summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-10-22 13:51:04 +1000
committerDave Airlie <airlied@redhat.com>2019-10-22 13:51:05 +1000
commit89910e62009a9359e6302f7e036a0a4564869cca (patch)
treedfb6d5f1f6a116e06bada6b4f5db2629db9aa367
parent7ed093602e0e1b60a0fc074a9692687e7d2b723d (diff)
parentce53908bba6fa6e905d8fe81da4591d3e7a65878 (diff)
Merge tag 'drm-intel-next-2019-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - Introduce a versioning of the i915-perf uapi (Lionel) - Add support for perf configuration queries (Lionel) Allow listing perf configurations with IOCTL in addition to sysfs. This is useful in container usecases. - Allow dynamic reconfiguration of the OA stream (Chris) Allows the OA stream to be reconfigured between batch buffers, giving greater flexibility in sampling. - Allow holding preemption on filtered perf ctx Allow CAP_ADMIN to block pre-emption of a context to query performance counters without disturbances. Mesa changes: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Cross-subsystem Changes: - drm-next backmerge for HDR DP changes https://lists.freedesktop.org/archives/dri-devel/2019-September/236453.html Driver Changes: - Add DC3CO sleep state for Tigerlake (Anshuman) - Tigerlake BCS engine support engine relative MMIO (Daniele) - Simplify the Tigerlake LRC register list for !RCS (Daniele) - Read SAGV block time from PCODE on Tigerlake (James) - Add 12 missing Tigerlake workarounds (Mika) - Enable DDI/Port G for Tigerlake (Khaled) - Avoid hang in tsg,vfe units by keeping l3 clocks ICL+(Mika) - Fix Bugzilla #111966: Favor last VBT child device (Ville) - Fix blue/black screen on boot due to broken gamma (Swati) - Add support of BT.2020 Colorimetry to DP MSA (Gwan-gyeong) - Attach colorspace property to DP connector (Gwan-gyeong) - Attach HDR metadata property to DP connector (Gwan-gyeong) - Base intel_memory_region support prep for local memory (Matt A) - Introduce Jasper Lake PCH (Matt R) - Support multiple GPUs in PMU (Tvrtko) - Fix MST oops due to MSA changes (Ville) - Refuse modes with hdisplay==4096 on pre-HSW DP (Ville) - Correct the PCH type in irq postinstall for JSP (Vivek) - Save Master transcoder in slave's crtc_state for Transcoder Port Sync (Manasi) - Enable TRANSCODER PORT SYNC for tiled displays across separate ports (Manasi) - HW state readout for transcoder port sync config (Manasi) - Enable master-slaves in trans port sync (Manasi) - In port sync mode disable slaves first then master (Manasi) - Fix port checks for MST support on gen >= 11 (Lucas) - Flush submission tasklet before waiting/retiring (Chris) - Flush tasklet submission before sleeping on i915_request_wait (Chris) - Object pin reference counting fixes (Chris, Matt A) - Clear semaphore immediately upon ELSP promotion (Chris) - Child device size remains unchanged through VBT 229 (Matt R) - Restore dropped 'interruptible' flag on retiring requests (Chris) - Treat a busy timeline as 'active' while waiting (Chris) - Clean up struct_mutex from perf (Chris) - Update locking around execlists->active (Chris) - Mark up expected execlist state during reset (Chris) - Remove cursor use of properties for coordinates (Maarten) - Only mark incomplete requests as -EIO on cancelling (Chris) - Add an rcu_barrier option to i915_drop_caches (Chris) - Replace perf global wakeref tracking with engine-pm (Chris) - Prevent merging requests with conflicting flags (Chris) - Allow for CS OA configs to be created lazily (Lionel) - Implement active wait for noa configurations (Lionel) - Execute OA configuration from command stream (Lionel) - Prefer using the pinned_ctx for emitting delays on config (Chris) - Port C's hotplug interrupt is associated with TC1 bits (Vivek, Matt R) - Extend program of VSC Header and DB for Colorimetry Format (Gwan-gyeong) - Fine-tune timeslicing of contexts (Chris) - Do initial mocs configuration directly (Chris) - Fix uninitialized variable on PMU error path (Tvrtko) - Don't disable interrupts independently of the locking (Sebastian) - Eliminate struct_mutext from GVT (Chris) - Move perf types to their own header (Lionel) - Drop list of perf streams (always size 1) (Lionel) - Store the perf associated engine of a stream (Lionel) - Make array hw_engine_mask static (Colin) - Prefer shortest path to RPM/perf/GT instead of dev_priv (Chris, Tvrtko) - Virtual request submission fixes (Chris) - Selftest/CI improvements (Chris) - Fix Kconfig indentation (Krzysztof) - Give engine->kernel_context distinct timeline lock classes (Chris) - Fix null pointer deref on selftest error path (Colin) - Select DPLL's via mask (Matt R) - Introduce and use intel_atomic_crtc_state_for_each_plane_state (Maarten) - Use intel_plane_state in prepare and cleanup plane_fb (Maarten) - Remove begin/finish_crtc_commit (Maarten) - Move SAGV block time to dev_priv (James) - Avoid polluting the i915_oa_config with error pointers (Chris) - Squelch display kerneldoc warnings (Chris) - Assert tasklet is locked for process_csb() (Chris) - Switch to using DP_MSA_MISC_* defines (Ville) - Stop using drm_atomic_helper_check_planes() (Ville) - Make .modeset_calc_cdclk() mandatory (Ville) - Use drm_rect_translate_to()/drm_rect_init() (Ville) - Refactor timestamping constants update (Ville) - Switch intel_legacy_cursor_update() to intel_ types (Ville) - Prepare the connector/encoder mask readout for hw vs. uapi state split (Ville) - Prepare the mode readout for hw vs. uapi state split (Ville) - Move swizzle_bit under i915_ggtt (Chris) - Improve microcontrollers documentation (Daniele) - Move the cursor rotation handling into intel_cursor_check_surface() (Ville) - Cleanups to pipe code (Ville) - Shrink eDRAM ways/sets arrays for code size (Ville) - Cleanups to HDCP2 timeout code (Ville) - Restore full symmetry in i915_driver_modeset_probe/remove (Janusz) - Simplify setting of ddi_io_power_domain (Lucas) - Add pipe id/name to pipe mismatch logs (Lucas) - Prettify MST debug message (Lucas) - Extract GT ring management to separate files (Andi) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191021180337.GA24338@jlahtine-desk.ger.corp.intel.com
-rw-r--r--Documentation/gpu/i915.rst48
-rw-r--r--drivers/gpu/drm/i915/Kconfig12
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug144
-rw-r--r--drivers/gpu/drm/i915/Makefile3
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.h2
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c28
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c31
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.c33
-rw-r--r--drivers/gpu/drm/i915/display/intel_connector.c21
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c178
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c1023
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.h17
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c154
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h7
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c235
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.h8
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c48
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c55
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c114
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_vbt_defs.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c13
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h23
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h20
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c174
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h29
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c79
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c65
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c8
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c97
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c101
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hangcheck.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c161
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c212
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c274
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ringbuffer.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c38
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c9
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c24
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c77
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.h14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c912
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c50
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c211
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c37
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c42
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/execlist.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c18
-rw-r--r--drivers/gpu/drm/i915/i915_active.c4
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c50
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c12
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h401
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h36
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence_reg.c89
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence_reg.h7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c65
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h5
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c4
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c48
-rw-r--r--drivers/gpu/drm/i915/i915_params.c3
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c29
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1374
-rw-r--r--drivers/gpu/drm/i915/i915_perf.h27
-rw-r--r--drivers/gpu/drm/i915/i915_perf_types.h406
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c37
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h4
-rw-r--r--drivers/gpu/drm/i915/i915_query.c296
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h38
-rw-r--r--drivers/gpu/drm/i915/i915_request.c15
-rw-r--r--drivers/gpu/drm/i915/i915_request.h10
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c17
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.h18
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler_types.h9
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c10
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h2
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.c213
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.h120
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c6
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h5
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c371
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c5
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_live_selftests.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_mock_selftests.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c216
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c287
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_reset.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_memory_region.c282
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c12
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.c59
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.h16
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.c5
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.h3
-rw-r--r--include/uapi/drm/i915_drm.h107
134 files changed, 7242 insertions, 2779 deletions
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 465779670fd4..60bd6e6403da 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -415,6 +415,15 @@ Object Tiling IOCTLs
.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
:doc: buffer object tiling
+Microcontrollers
+================
+
+Starting from gen9, three microcontrollers are available on the HW: the
+graphics microcontroller (GuC), the HEVC/H.265 microcontroller (HuC) and the
+display microcontroller (DMC). The driver is responsible for loading the
+firmwares on the microcontrollers; the GuC and HuC firmwares are transferred
+to WOPCM using the DMA engine, while the DMC firmware is written through MMIO.
+
WOPCM
-----
@@ -427,12 +436,24 @@ WOPCM Layout
GuC
---
-Firmware Layout
-~~~~~~~~~~~~~~~
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
+ :doc: GuC
+
+GuC Firmware Layout
+~~~~~~~~~~~~~~~~~~~
.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
:doc: Firmware Layout
+GuC Memory Management
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
+ :doc: GuC Memory Management
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
+ :functions: intel_guc_allocate_vma
+
+
GuC-specific firmware loader
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -448,11 +469,26 @@ GuC-based command submission
.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
:internal:
-GuC Address Space
-~~~~~~~~~~~~~~~~~
+HuC
+---
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c
+ :doc: HuC
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c
+ :functions: intel_huc_auth
+
+HuC Memory Management
+~~~~~~~~~~~~~~~~~~~~~
-.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c
- :doc: GuC Address Space
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c
+ :doc: HuC Memory Management
+
+HuC Firmware Layout
+~~~~~~~~~~~~~~~~~~~
+The HuC FW layout is the same as the GuC one, see `GuC Firmware Layout`_
+
+DMC
+---
+See `CSR firmware support for DMC`_
Tracing
=======
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 0d21402945ab..3c6d57df262d 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -76,7 +76,7 @@ config DRM_I915_CAPTURE_ERROR
This option enables capturing the GPU state when a hang is detected.
This information is vital for triaging hangs and assists in debugging.
Please report any hang to
- https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
+ https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
for triaging.
If in doubt, say "Y".
@@ -105,11 +105,11 @@ config DRM_I915_USERPTR
If in doubt, say "Y".
config DRM_I915_GVT
- bool "Enable Intel GVT-g graphics virtualization host support"
- depends on DRM_I915
- depends on 64BIT
- default n
- help
+ bool "Enable Intel GVT-g graphics virtualization host support"
+ depends on DRM_I915
+ depends on 64BIT
+ default n
+ help
Choose this option if you want to enable Intel GVT-g graphics
virtualization technology host support with integrated graphics.
With GVT-g, it's possible to have one integrated graphics
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 00786a142ff0..eea79125b3ea 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -1,34 +1,34 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_I915_WERROR
- bool "Force GCC to throw an error instead of a warning when compiling"
- # As this may inadvertently break the build, only allow the user
- # to shoot oneself in the foot iff they aim really hard
- depends on EXPERT
- # We use the dependency on !COMPILE_TEST to not be enabled in
- # allmodconfig or allyesconfig configurations
- depends on !COMPILE_TEST
+ bool "Force GCC to throw an error instead of a warning when compiling"
+ # As this may inadvertently break the build, only allow the user
+ # to shoot oneself in the foot iff they aim really hard
+ depends on EXPERT
+ # We use the dependency on !COMPILE_TEST to not be enabled in
+ # allmodconfig or allyesconfig configurations
+ depends on !COMPILE_TEST
select HEADER_TEST
- default n
- help
- Add -Werror to the build flags for (and only for) i915.ko.
- Do not enable this unless you are writing code for the i915.ko module.
+ default n
+ help
+ Add -Werror to the build flags for (and only for) i915.ko.
+ Do not enable this unless you are writing code for the i915.ko module.
- Recommended for driver developers only.
+ Recommended for driver developers only.
- If in doubt, say "N".
+ If in doubt, say "N".
config DRM_I915_DEBUG
- bool "Enable additional driver debugging"
- depends on DRM_I915
- select DEBUG_FS
- select PREEMPT_COUNT
- select REFCOUNT_FULL
- select I2C_CHARDEV
- select STACKDEPOT
- select DRM_DP_AUX_CHARDEV
- select X86_MSR # used by igt/pm_rpm
- select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
- select DRM_DEBUG_MM if DRM=y
+ bool "Enable additional driver debugging"
+ depends on DRM_I915
+ select DEBUG_FS
+ select PREEMPT_COUNT
+ select REFCOUNT_FULL
+ select I2C_CHARDEV
+ select STACKDEPOT
+ select DRM_DP_AUX_CHARDEV
+ select X86_MSR # used by igt/pm_rpm
+ select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
+ select DRM_DEBUG_MM if DRM=y
select DRM_DEBUG_SELFTEST
select DMABUF_SELFTESTS
select SW_SYNC # signaling validation framework (igt/syncobj*)
@@ -36,14 +36,14 @@ config DRM_I915_DEBUG
select DRM_I915_SELFTEST
select DRM_I915_DEBUG_RUNTIME_PM
select DRM_I915_DEBUG_MMIO
- default n
- help
- Choose this option to turn on extra driver debugging that may affect
- performance but will catch some internal issues.
+ default n
+ help
+ Choose this option to turn on extra driver debugging that may affect
+ performance but will catch some internal issues.
- Recommended for driver developers only.
+ Recommended for driver developers only.
- If in doubt, say "N".
+ If in doubt, say "N".
config DRM_I915_DEBUG_MMIO
bool "Always insert extra checks around mmio access by default"
@@ -59,16 +59,16 @@ config DRM_I915_DEBUG_MMIO
If in doubt, say "N".
config DRM_I915_DEBUG_GEM
- bool "Insert extra checks into the GEM internals"
- default n
- depends on DRM_I915_WERROR
- help
- Enable extra sanity checks (including BUGs) along the GEM driver
- paths that may slow the system down and if hit hang the machine.
+ bool "Insert extra checks into the GEM internals"
+ default n
+ depends on DRM_I915_WERROR
+ help
+ Enable extra sanity checks (including BUGs) along the GEM driver
+ paths that may slow the system down and if hit hang the machine.
- Recommended for driver developers only.
+ Recommended for driver developers only.
- If in doubt, say "N".
+ If in doubt, say "N".
config DRM_I915_ERRLOG_GEM
bool "Insert extra logging (very verbose) for common GEM errors"
@@ -111,41 +111,41 @@ config DRM_I915_TRACE_GTT
If in doubt, say "N".
config DRM_I915_SW_FENCE_DEBUG_OBJECTS
- bool "Enable additional driver debugging for fence objects"
- depends on DRM_I915
- select DEBUG_OBJECTS
- default n
- help
- Choose this option to turn on extra driver debugging that may affect
- performance but will catch some internal issues.
+ bool "Enable additional driver debugging for fence objects"
+ depends on DRM_I915
+ select DEBUG_OBJECTS
+ default n
+ help
+ Choose this option to turn on extra driver debugging that may affect
+ performance but will catch some internal issues.
- Recommended for driver developers only.
+ Recommended for driver developers only.
- If in doubt, say "N".
+ If in doubt, say "N".
config DRM_I915_SW_FENCE_CHECK_DAG
- bool "Enable additional driver debugging for detecting dependency cycles"
- depends on DRM_I915
- default n
- help
- Choose this option to turn on extra driver debugging that may affect
- performance but will catch some internal issues.
+ bool "Enable additional driver debugging for detecting dependency cycles"
+ depends on DRM_I915
+ default n
+ help
+ Choose this option to turn on extra driver debugging that may affect
+ performance but will catch some internal issues.
- Recommended for driver developers only.
+ Recommended for driver developers only.
- If in doubt, say "N".
+ If in doubt, say "N".
config DRM_I915_DEBUG_GUC
- bool "Enable additional driver debugging for GuC"
- depends on DRM_I915
- default n
- help
- Choose this option to turn on extra driver debugging that may affect
- performance but will help resolve GuC related issues.
+ bool "Enable additional driver debugging for GuC"
+ depends on DRM_I915
+ default n
+ help
+ Choose this option to turn on extra driver debugging that may affect
+ performance but will help resolve GuC related issues.
- Recommended for driver developers only.
+ Recommended for driver developers only.
- If in doubt, say "N".
+ If in doubt, say "N".
config DRM_I915_SELFTEST
bool "Enable selftests upon driver load"
@@ -178,15 +178,15 @@ config DRM_I915_SELFTEST_BROKEN
If in doubt, say "N".
config DRM_I915_LOW_LEVEL_TRACEPOINTS
- bool "Enable low level request tracing events"
- depends on DRM_I915
- default n
- help
- Choose this option to turn on low level request tracing events.
- This provides the ability to precisely monitor engine utilisation
- and also analyze the request dependency resolving timeline.
-
- If in doubt, say "N".
+ bool "Enable low level request tracing events"
+ depends on DRM_I915
+ default n
+ help
+ Choose this option to turn on low level request tracing events.
+ This provides the ability to precisely monitor engine utilisation
+ and also analyze the request dependency resolving timeline.
+
+ If in doubt, say "N".
config DRM_I915_DEBUG_VBLANK_EVADE
bool "Enable extra debug warnings for vblank evasion"
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a6006aa715ff..a16a2daef977 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -51,6 +51,7 @@ i915-y += i915_drv.o \
i915_utils.o \
intel_csr.o \
intel_device_info.o \
+ intel_memory_region.o \
intel_pch.o \
intel_pm.o \
intel_runtime_pm.o \
@@ -86,6 +87,7 @@ gt-y += \
gt/intel_gt_pm_irq.o \
gt/intel_gt_requests.o \
gt/intel_hangcheck.o \
+ gt/intel_llc.o \
gt/intel_lrc.o \
gt/intel_rc6.o \
gt/intel_renderstate.o \
@@ -121,6 +123,7 @@ gem-y += \
gem/i915_gem_pages.o \
gem/i915_gem_phys.o \
gem/i915_gem_pm.o \
+ gem/i915_gem_region.o \
gem/i915_gem_shmem.o \
gem/i915_gem_shrinker.o \
gem/i915_gem_stolen.o \
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index 98b7766eaa7a..a6cff5a160fb 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -195,14 +195,11 @@ get_crtc_from_states(const struct intel_plane_state *old_plane_state,
return NULL;
}
-static int intel_plane_atomic_check(struct drm_plane *_plane,
- struct drm_plane_state *_new_plane_state)
+int intel_plane_atomic_check(struct intel_atomic_state *state,
+ struct intel_plane *plane)
{
- struct intel_plane *plane = to_intel_plane(_plane);
- struct intel_atomic_state *state =
- to_intel_atomic_state(_new_plane_state->state);
struct intel_plane_state *new_plane_state =
- to_intel_plane_state(_new_plane_state);
+ intel_atomic_get_new_plane_state(state, plane);
const struct intel_plane_state *old_plane_state =
intel_atomic_get_old_plane_state(state, plane);
struct intel_crtc *crtc =
@@ -369,5 +366,4 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state,
const struct drm_plane_helper_funcs intel_plane_helper_funcs = {
.prepare_fb = intel_prepare_plane_fb,
.cleanup_fb = intel_cleanup_plane_fb,
- .atomic_check = intel_plane_atomic_check,
};
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
index cb7ef4f9eafd..dc85af02e9b7 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
@@ -41,6 +41,8 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
struct intel_crtc_state *crtc_state,
const struct intel_plane_state *old_plane_state,
struct intel_plane_state *intel_state);
+int intel_plane_atomic_check(struct intel_atomic_state *state,
+ struct intel_plane *plane);
int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
struct intel_crtc_state *crtc_state,
const struct intel_plane_state *old_plane_state,
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 1def550c68c8..63c1bd4c2954 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, "
"disabling port %c DVI/HDMI support\n",
port_name(port), info->alternate_ddc_pin,
- port_name(p), port_name(port));
+ port_name(p), port_name(p));
/*
* If we have multiple ports supposedly sharing the
@@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
* port. Otherwise they share the same ddc bin and
* system couldn't communicate with them separately.
*
- * Give child device order the priority, first come first
- * served.
+ * Give inverse child device order the priority,
+ * last one wins. Yes, there are real machines
+ * (eg. Asrock B250M-HDV) where VBT has both
+ * port A and port E with the same AUX ch and
+ * we must pick port E :(
*/
+ info = &dev_priv->vbt.ddi_port_info[p];
+
info->supports_dvi = false;
info->supports_hdmi = false;
info->alternate_ddc_pin = 0;
@@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, "
"disabling port %c DP support\n",
port_name(port), info->alternate_aux_channel,
- port_name(p), port_name(port));
+ port_name(p), port_name(p));
/*
* If we have multiple ports supposedlt sharing the
@@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
* port. Otherwise they share the same aux channel
* and system couldn't communicate with them separately.
*
- * Give child device order the priority, first come first
- * served.
+ * Give inverse child device order the priority,
+ * last one wins. Yes, there are real machines
+ * (eg. Asrock B250M-HDV) where VBT has both
+ * port A and port E with the same AUX ch and
+ * we must pick port E :(
*/
+ info = &dev_priv->vbt.ddi_port_info[p];
+
info->supports_dp = false;
info->alternate_aux_channel = 0;
}
@@ -1389,6 +1399,7 @@ static enum port dvo_port_to_port(u8 dvo_port)
[PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1},
[PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE},
[PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1},
+ [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1},
};
enum port port;
int i;
@@ -1615,7 +1626,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
expected_size = 37;
} else if (bdb->version <= 215) {
expected_size = 38;
- } else if (bdb->version <= 216) {
+ } else if (bdb->version <= 229) {
expected_size = 39;
} else {
expected_size = sizeof(*child);
@@ -2248,6 +2259,9 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv,
case DP_AUX_F:
aux_ch = AUX_CH_F;
break;
+ case DP_AUX_G:
+ aux_ch = AUX_CH_G;
+ break;
default:
MISSING_CASE(info->alternate_aux_channel);
aux_ch = AUX_CH_A;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 43564295b864..3d867963a6d1 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1900,9 +1900,11 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv,
intel_set_cdclk(dev_priv, new_state, pipe);
}
-static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
- int pixel_rate)
+static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
{
+ struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+ int pixel_rate = crtc_state->pixel_rate;
+
if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
return DIV_ROUND_UP(pixel_rate, 2);
else if (IS_GEN(dev_priv, 9) ||
@@ -1910,6 +1912,8 @@ static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
return pixel_rate;
else if (IS_CHERRYVIEW(dev_priv))
return DIV_ROUND_UP(pixel_rate * 100, 95);
+ else if (crtc_state->double_wide)
+ return DIV_ROUND_UP(pixel_rate * 100, 90 * 2);
else
return DIV_ROUND_UP(pixel_rate * 100, 90);
}
@@ -1923,7 +1927,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
if (!crtc_state->base.enable)
return 0;
- min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate);
+ min_cdclk = intel_pixel_rate_to_cdclk(crtc_state);
/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state))
@@ -2277,15 +2281,28 @@ static int intel_modeset_all_pipes(struct intel_atomic_state *state)
return 0;
}
+static int fixed_modeset_calc_cdclk(struct intel_atomic_state *state)
+{
+ int min_cdclk;
+
+ /*
+ * We can't change the cdclk frequency, but we still want to
+ * check that the required minimum frequency doesn't exceed
+ * the actual cdclk frequency.
+ */
+ min_cdclk = intel_compute_min_cdclk(state);
+ if (min_cdclk < 0)
+ return min_cdclk;
+
+ return 0;
+}
+
int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
{
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
enum pipe pipe;
int ret;
- if (!dev_priv->display.modeset_calc_cdclk)
- return 0;
-
ret = dev_priv->display.modeset_calc_cdclk(state);
if (ret)
return ret;
@@ -2596,6 +2613,8 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv)
} else if (IS_VALLEYVIEW(dev_priv)) {
dev_priv->display.set_cdclk = vlv_set_cdclk;
dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk;
+ } else {
+ dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk;
}
if (INTEL_GEN(dev_priv) >= 10 || IS_GEN9_LP(dev_priv))
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 9ab34902663e..fa44eb73d088 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -1420,6 +1420,9 @@ static int icl_color_check(struct intel_crtc_state *crtc_state)
static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return 0;
+
switch (crtc_state->gamma_mode) {
case GAMMA_MODE_MODE_8BIT:
return 8;
@@ -1433,6 +1436,9 @@ static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state)
static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return 0;
+
if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0)
return 0;
@@ -1457,6 +1463,9 @@ static int chv_gamma_precision(const struct intel_crtc_state *crtc_state)
static int glk_gamma_precision(const struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return 0;
+
switch (crtc_state->gamma_mode) {
case GAMMA_MODE_MODE_8BIT:
return 8;
@@ -1473,9 +1482,6 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
- if (!crtc_state->gamma_enable)
- return 0;
-
if (HAS_GMCH(dev_priv)) {
if (IS_CHERRYVIEW(dev_priv))
return chv_gamma_precision(crtc_state);
@@ -1613,6 +1619,9 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state)
static void i9xx_read_luts(struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return;
+
crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state);
}
@@ -1659,6 +1668,9 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state)
static void i965_read_luts(struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return;
+
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state);
else
@@ -1701,10 +1713,10 @@ chv_read_cgm_lut(const struct intel_crtc_state *crtc_state)
static void chv_read_luts(struct intel_crtc_state *crtc_state)
{
- if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
- crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state);
- else
+ if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA)
crtc_state->base.gamma_lut = chv_read_cgm_lut(crtc_state);
+ else
+ i965_read_luts(crtc_state);
}
static struct drm_property_blob *
@@ -1742,6 +1754,12 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state)
static void ilk_read_luts(struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return;
+
+ if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0)
+ return;
+
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state);
else
@@ -1788,6 +1806,9 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index)
static void glk_read_luts(struct intel_crtc_state *crtc_state)
{
+ if (!crtc_state->gamma_enable)
+ return;
+
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state);
else
diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c
index ba2ef165a01a..1133c4e97bb4 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.c
+++ b/drivers/gpu/drm/i915/display/intel_connector.c
@@ -277,7 +277,22 @@ intel_attach_aspect_ratio_property(struct drm_connector *connector)
void
intel_attach_colorspace_property(struct drm_connector *connector)
{
- if (!drm_mode_create_hdmi_colorspace_property(connector))
- drm_object_attach_property(&connector->base,
- connector->colorspace_property, 0);
+ switch (connector->connector_type) {
+ case DRM_MODE_CONNECTOR_HDMIA:
+ case DRM_MODE_CONNECTOR_HDMIB:
+ if (drm_mode_create_hdmi_colorspace_property(connector))
+ return;
+ break;
+ case DRM_MODE_CONNECTOR_DisplayPort:
+ case DRM_MODE_CONNECTOR_eDP:
+ if (drm_mode_create_dp_colorspace_property(connector))
+ return;
+ break;
+ default:
+ DRM_DEBUG_KMS("Colorspace property not supported\n");
+ return;
+ }
+
+ drm_object_attach_property(&connector->base,
+ connector->colorspace_property, 0);
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 3c1e885e0187..9ba794cb9b4f 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -45,6 +45,7 @@
#include "intel_lspcon.h"
#include "intel_panel.h"
#include "intel_psr.h"
+#include "intel_sprite.h"
#include "intel_tc.h"
#include "intel_vdsc.h"
@@ -1740,7 +1741,8 @@ static void intel_ddi_clock_get(struct intel_encoder *encoder,
hsw_ddi_clock_get(encoder, pipe_config);
}
-void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
+void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -1752,20 +1754,20 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
WARN_ON(transcoder_is_dsi(cpu_transcoder));
- temp = TRANS_MSA_SYNC_CLK;
+ temp = DP_MSA_MISC_SYNC_CLOCK;
switch (crtc_state->pipe_bpp) {
case 18:
- temp |= TRANS_MSA_6_BPC;
+ temp |= DP_MSA_MISC_6_BPC;
break;
case 24:
- temp |= TRANS_MSA_8_BPC;
+ temp |= DP_MSA_MISC_8_BPC;
break;
case 30:
- temp |= TRANS_MSA_10_BPC;
+ temp |= DP_MSA_MISC_10_BPC;
break;
case 36:
- temp |= TRANS_MSA_12_BPC;
+ temp |= DP_MSA_MISC_12_BPC;
break;
default:
MISSING_CASE(crtc_state->pipe_bpp);
@@ -1777,7 +1779,7 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB);
if (crtc_state->limited_color_range)
- temp |= TRANS_MSA_CEA_RANGE;
+ temp |= DP_MSA_MISC_COLOR_CEA_RGB;
/*
* As per DP 1.2 spec section 2.3.4.3 while sending
@@ -1785,17 +1787,19 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
* colorspace information.
*/
if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444)
- temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR |
- TRANS_MSA_YCBCR_BT709;
+ temp |= DP_MSA_MISC_COLOR_YCBCR_444_BT709;
/*
* As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication
* of Color Encoding Format and Content Color Gamut] while sending
- * YCBCR 420 signals we should program MSA MISC1 fields which
- * indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
+ * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields
+ * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
+ *
+ * FIXME MST doesn't pass in the conn_state
*/
- if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
- temp |= TRANS_MSA_USE_VSC_SDP;
+ if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+ temp |= DP_MSA_MISC_COLOR_VSC_SDP;
+
I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
}
@@ -3330,6 +3334,86 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder,
POSTING_READ(intel_dp->regs.dp_tp_ctl);
}
+static void
+tgl_clear_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
+{
+ struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
+ u32 val;
+
+ if (!cstate->dc3co_exitline)
+ return;
+
+ val = I915_READ(EXITLINE(cstate->cpu_transcoder));
+ val &= ~(EXITLINE_MASK | EXITLINE_ENABLE);
+ I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
+}
+
+static void
+tgl_set_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
+{
+ u32 val, exit_scanlines;
+ struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
+
+ if (!cstate->dc3co_exitline)
+ return;
+
+ exit_scanlines = cstate->dc3co_exitline;
+ exit_scanlines <<= EXITLINE_SHIFT;
+ val = I915_READ(EXITLINE(cstate->cpu_transcoder));
+ val &= ~(EXITLINE_MASK | EXITLINE_ENABLE);
+ val |= exit_scanlines;
+ val |= EXITLINE_ENABLE;
+ I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
+}
+
+static void tgl_dc3co_exitline_compute_config(struct intel_encoder *encoder,
+ struct intel_crtc_state *cstate)
+{
+ u32 exit_scanlines;
+ struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
+ u32 crtc_vdisplay = cstate->base.adjusted_mode.crtc_vdisplay;
+
+ cstate->dc3co_exitline = 0;
+
+ if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC3CO))
+ return;
+
+ /* B.Specs:49196 DC3CO only works with pipeA and DDIA.*/
+ if (to_intel_crtc(cstate->base.crtc)->pipe != PIPE_A ||
+ encoder->port != PORT_A)
+ return;
+
+ if (!cstate->has_psr2 || !cstate->base.active)
+ return;
+
+ /*
+ * DC3CO Exit time 200us B.Spec 49196
+ * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1
+ */
+ exit_scanlines =
+ intel_usecs_to_scanlines(&cstate->base.adjusted_mode, 200) + 1;
+
+ if (WARN_ON(exit_scanlines > crtc_vdisplay))
+ return;
+
+ cstate->dc3co_exitline = crtc_vdisplay - exit_scanlines;
+ DRM_DEBUG_KMS("DC3CO exit scanlines %d\n", cstate->dc3co_exitline);
+}
+
+static void tgl_dc3co_exitline_get_config(struct intel_crtc_state *crtc_state)
+{
+ u32 val;
+ struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+ if (INTEL_GEN(dev_priv) < 12)
+ return;
+
+ val = I915_READ(EXITLINE(crtc_state->cpu_transcoder));
+
+ if (val & EXITLINE_ENABLE)
+ crtc_state->dc3co_exitline = val & EXITLINE_MASK;
+}
+
static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
const struct intel_crtc_state *crtc_state,
const struct drm_connector_state *conn_state)
@@ -3342,6 +3426,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
int level = intel_ddi_dp_level(intel_dp);
enum transcoder transcoder = crtc_state->cpu_transcoder;
+ tgl_set_psr2_transcoder_exitline(crtc_state);
intel_dp_set_link_params(intel_dp, crtc_state->port_clock,
crtc_state->lane_count, is_mst);
@@ -3415,7 +3500,8 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
intel_dp_start_link_train(intel_dp);
/* 7.k */
- intel_dp_stop_link_train(intel_dp);
+ if (!is_trans_port_sync_mode(crtc_state))
+ intel_dp_stop_link_train(intel_dp);
/*
* TODO: enable clock gating
@@ -3489,7 +3575,8 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
true);
intel_dp_sink_set_fec_ready(intel_dp, crtc_state);
intel_dp_start_link_train(intel_dp);
- if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
+ if ((port != PORT_A || INTEL_GEN(dev_priv) >= 9) &&
+ !is_trans_port_sync_mode(crtc_state))
intel_dp_stop_link_train(intel_dp);
intel_ddi_enable_fec(encoder, crtc_state);
@@ -3512,6 +3599,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
else
hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
+
+ intel_ddi_set_dp_msa(crtc_state, conn_state);
}
static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
@@ -3666,6 +3755,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
dig_port->ddi_io_power_domain);
intel_ddi_clk_disable(encoder);
+ tgl_clear_psr2_transcoder_exitline(old_crtc_state);
}
static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
@@ -3768,7 +3858,8 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder,
intel_edp_backlight_on(crtc_state, conn_state);
intel_psr_enable(intel_dp, crtc_state);
- intel_dp_ycbcr_420_enable(intel_dp, crtc_state);
+ intel_dp_vsc_enable(intel_dp, crtc_state, conn_state);
+ intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state);
intel_edp_drrs_enable(intel_dp, crtc_state);
if (crtc_state->has_audio)
@@ -3926,7 +4017,7 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder,
{
struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
- intel_ddi_set_pipe_settings(crtc_state);
+ intel_ddi_set_dp_msa(crtc_state, conn_state);
intel_psr_update(intel_dp, crtc_state);
intel_edp_drrs_enable(intel_dp, crtc_state);
@@ -4212,6 +4303,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
break;
}
+ if (encoder->type == INTEL_OUTPUT_EDP)
+ tgl_dc3co_exitline_get_config(pipe_config);
+
pipe_config->has_audio =
intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder);
@@ -4289,10 +4383,13 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A)
pipe_config->cpu_transcoder = TRANSCODER_EDP;
- if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI))
+ if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) {
ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state);
- else
+ } else {
ret = intel_dp_compute_config(encoder, pipe_config, conn_state);
+ tgl_dc3co_exitline_compute_config(encoder, pipe_config);
+ }
+
if (ret)
return ret;
@@ -4661,46 +4758,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
intel_encoder->update_complete = intel_ddi_update_complete;
}
- switch (port) {
- case PORT_A:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_A_IO;
- break;
- case PORT_B:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_B_IO;
- break;
- case PORT_C:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_C_IO;
- break;
- case PORT_D:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_D_IO;
- break;
- case PORT_E:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_E_IO;
- break;
- case PORT_F:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_F_IO;
- break;
- case PORT_G:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_G_IO;
- break;
- case PORT_H:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_H_IO;
- break;
- case PORT_I:
- intel_dig_port->ddi_io_power_domain =
- POWER_DOMAIN_PORT_DDI_I_IO;
- break;
- default:
- MISSING_CASE(port);
- }
+ WARN_ON(port > PORT_I);
+ intel_dig_port->ddi_io_power_domain = POWER_DOMAIN_PORT_DDI_A_IO +
+ port - PORT_A;
if (init_dp) {
if (!intel_ddi_init_dp_connector(intel_dig_port))
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index a08365da2643..19aeab1246ee 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -30,7 +30,8 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state);
void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state);
-void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
+void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state);
bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
void intel_ddi_get_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 05fb672a00b9..2912abd85148 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -135,8 +135,6 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
const struct intel_crtc_state *pipe_config);
static void chv_prepare_pll(struct intel_crtc *crtc,
const struct intel_crtc_state *pipe_config);
-static void intel_begin_crtc_commit(struct intel_atomic_state *, struct intel_crtc *);
-static void intel_finish_crtc_commit(struct intel_atomic_state *, struct intel_crtc *);
static void intel_crtc_init_scalers(struct intel_crtc *crtc,
struct intel_crtc_state *crtc_state);
static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state);
@@ -521,6 +519,20 @@ needs_modeset(const struct intel_crtc_state *state)
return drm_atomic_crtc_needs_modeset(&state->base);
}
+bool
+is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state)
+{
+ return (crtc_state->master_transcoder != INVALID_TRANSCODER ||
+ crtc_state->sync_mode_slaves_mask);
+}
+
+static bool
+is_trans_port_sync_master(const struct intel_crtc_state *crtc_state)
+{
+ return (crtc_state->master_transcoder == INVALID_TRANSCODER &&
+ crtc_state->sync_mode_slaves_mask);
+}
+
/*
* Platform specific helpers to calculate the port PLL loopback- (clock.m),
* and post-divider (clock.p) values, pre- (clock.vco) and post-divided fast
@@ -2735,10 +2747,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
size++;
/* rotate the x/y offsets to match the GTT view */
- r.x1 = x;
- r.y1 = y;
- r.x2 = x + width;
- r.y2 = y + height;
+ drm_rect_init(&r, x, y, width, height);
drm_rect_rotate(&r,
rot_info->plane[i].width * tile_width,
rot_info->plane[i].height * tile_height,
@@ -2860,10 +2869,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state)
struct drm_rect r;
/* rotate the x/y offsets to match the GTT view */
- r.x1 = x;
- r.y1 = y;
- r.x2 = x + width;
- r.y2 = y + height;
+ drm_rect_init(&r, x, y, width, height);
drm_rect_rotate(&r,
info->plane[i].width * tile_width,
info->plane[i].height * tile_height,
@@ -3478,9 +3484,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
* Put the final coordinates back so that the src
* coordinate checks will see the right values.
*/
- drm_rect_translate(&plane_state->base.src,
- (x << 16) - plane_state->base.src.x1,
- (y << 16) - plane_state->base.src.y1);
+ drm_rect_translate_to(&plane_state->base.src,
+ x << 16, y << 16);
return 0;
}
@@ -3702,9 +3707,8 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
* Put the final coordinates back so that the src
* coordinate checks will see the right values.
*/
- drm_rect_translate(&plane_state->base.src,
- (src_x << 16) - plane_state->base.src.x1,
- (src_y << 16) - plane_state->base.src.y1);
+ drm_rect_translate_to(&plane_state->base.src,
+ src_x << 16, src_y << 16);
/* HSW/BDW do this automagically in hardware */
if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) {
@@ -4401,43 +4405,53 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc)
I915_WRITE(PIPE_CHICKEN(pipe), tmp);
}
-static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state,
- const struct intel_crtc_state *new_crtc_state)
+static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state)
{
- struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
- /* drm_atomic_helper_update_legacy_modeset_state might not be called. */
- crtc->base.mode = new_crtc_state->base.mode;
+ u32 trans_ddi_func_ctl2_val;
+ u8 master_select;
/*
- * Update pipe size and adjust fitter if needed: the reason for this is
- * that in compute_mode_changes we check the native mode (not the pfit
- * mode) to see if we can flip rather than do a full mode set. In the
- * fastboot case, we'll flip, but if we don't update the pipesrc and
- * pfit state, we'll end up with a big fb scanned out into the wrong
- * sized surface.
+ * Configure the master select and enable Transcoder Port Sync for
+ * Slave CRTCs transcoder.
*/
+ if (crtc_state->master_transcoder == INVALID_TRANSCODER)
+ return;
- I915_WRITE(PIPESRC(crtc->pipe),
- ((new_crtc_state->pipe_src_w - 1) << 16) |
- (new_crtc_state->pipe_src_h - 1));
+ if (crtc_state->master_transcoder == TRANSCODER_EDP)
+ master_select = 0;
+ else
+ master_select = crtc_state->master_transcoder + 1;
- /* on skylake this is done by detaching scalers */
- if (INTEL_GEN(dev_priv) >= 9) {
- skl_detach_scalers(new_crtc_state);
+ /* Set the master select bits for Tranascoder Port Sync */
+ trans_ddi_func_ctl2_val = (PORT_SYNC_MODE_MASTER_SELECT(master_select) &
+ PORT_SYNC_MODE_MASTER_SELECT_MASK) <<
+ PORT_SYNC_MODE_MASTER_SELECT_SHIFT;
+ /* Enable Transcoder Port Sync */
+ trans_ddi_func_ctl2_val |= PORT_SYNC_MODE_ENABLE;
- if (new_crtc_state->pch_pfit.enabled)
- skylake_pfit_enable(new_crtc_state);
- } else if (HAS_PCH_SPLIT(dev_priv)) {
- if (new_crtc_state->pch_pfit.enabled)
- ironlake_pfit_enable(new_crtc_state);
- else if (old_crtc_state->pch_pfit.enabled)
- ironlake_pfit_disable(old_crtc_state);
- }
+ I915_WRITE(TRANS_DDI_FUNC_CTL2(crtc_state->cpu_transcoder),
+ trans_ddi_func_ctl2_val);
+}
- if (INTEL_GEN(dev_priv) >= 11)
- icl_set_pipe_chicken(crtc);
+static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ i915_reg_t reg;
+ u32 trans_ddi_func_ctl2_val;
+
+ if (old_crtc_state->master_transcoder == INVALID_TRANSCODER)
+ return;
+
+ DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n",
+ transcoder_name(old_crtc_state->cpu_transcoder));
+
+ reg = TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder);
+ trans_ddi_func_ctl2_val = ~(PORT_SYNC_MODE_ENABLE |
+ PORT_SYNC_MODE_MASTER_SELECT_MASK);
+ I915_WRITE(reg, trans_ddi_func_ctl2_val);
}
static void intel_fdi_normal_train(struct intel_crtc *crtc)
@@ -6464,6 +6478,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
if (!transcoder_is_dsi(cpu_transcoder))
intel_set_pipe_timings(pipe_config);
+ if (INTEL_GEN(dev_priv) >= 11)
+ icl_enable_trans_port_sync(pipe_config);
+
intel_set_pipe_src_size(pipe_config);
if (cpu_transcoder != TRANSCODER_EDP &&
@@ -6509,7 +6526,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
if (INTEL_GEN(dev_priv) >= 11)
icl_set_pipe_chicken(intel_crtc);
- intel_ddi_set_pipe_settings(pipe_config);
if (!transcoder_is_dsi(cpu_transcoder))
intel_ddi_enable_transcoder_func(pipe_config);
@@ -6642,6 +6658,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST))
intel_ddi_set_vc_payload_alloc(old_crtc_state, false);
+ if (INTEL_GEN(dev_priv) >= 11)
+ icl_disable_transcoder_port_sync(old_crtc_state);
+
if (!transcoder_is_dsi(cpu_transcoder))
intel_ddi_disable_transcoder_func(old_crtc_state);
@@ -6739,6 +6758,8 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port)
return POWER_DOMAIN_PORT_DDI_E_LANES;
case PORT_F:
return POWER_DOMAIN_PORT_DDI_F_LANES;
+ case PORT_G:
+ return POWER_DOMAIN_PORT_DDI_G_LANES;
default:
MISSING_CASE(port);
return POWER_DOMAIN_PORT_OTHER;
@@ -6762,6 +6783,8 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
return POWER_DOMAIN_AUX_E_TBT;
case AUX_CH_F:
return POWER_DOMAIN_AUX_F_TBT;
+ case AUX_CH_G:
+ return POWER_DOMAIN_AUX_G_TBT;
default:
MISSING_CASE(dig_port->aux_ch);
return POWER_DOMAIN_AUX_C_TBT;
@@ -6781,6 +6804,8 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
return POWER_DOMAIN_AUX_E;
case AUX_CH_F:
return POWER_DOMAIN_AUX_F;
+ case AUX_CH_G:
+ return POWER_DOMAIN_AUX_G;
default:
MISSING_CASE(dig_port->aux_ch);
return POWER_DOMAIN_AUX_A;
@@ -8795,6 +8820,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
pipe_config->shared_dpll = NULL;
+ pipe_config->master_transcoder = INVALID_TRANSCODER;
ret = false;
@@ -9984,6 +10010,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
pipe_config->shared_dpll = NULL;
+ pipe_config->master_transcoder = INVALID_TRANSCODER;
ret = false;
tmp = I915_READ(PIPECONF(crtc->pipe));
@@ -10434,6 +10461,59 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
}
}
+static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_priv,
+ enum transcoder cpu_transcoder)
+{
+ u32 trans_port_sync, master_select;
+
+ trans_port_sync = I915_READ(TRANS_DDI_FUNC_CTL2(cpu_transcoder));
+
+ if ((trans_port_sync & PORT_SYNC_MODE_ENABLE) == 0)
+ return INVALID_TRANSCODER;
+
+ master_select = trans_port_sync &
+ PORT_SYNC_MODE_MASTER_SELECT_MASK;
+ if (master_select == 0)
+ return TRANSCODER_EDP;
+ else
+ return master_select - 1;
+}
+
+static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+ u32 transcoders;
+ enum transcoder cpu_transcoder;
+
+ crtc_state->master_transcoder = transcoder_master_readout(dev_priv,
+ crtc_state->cpu_transcoder);
+
+ transcoders = BIT(TRANSCODER_A) |
+ BIT(TRANSCODER_B) |
+ BIT(TRANSCODER_C) |
+ BIT(TRANSCODER_D);
+ for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
+ enum intel_display_power_domain power_domain;
+ intel_wakeref_t trans_wakeref;
+
+ power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+ trans_wakeref = intel_display_power_get_if_enabled(dev_priv,
+ power_domain);
+
+ if (!trans_wakeref)
+ continue;
+
+ if (transcoder_master_readout(dev_priv, cpu_transcoder) ==
+ crtc_state->cpu_transcoder)
+ crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder);
+
+ intel_display_power_put(dev_priv, power_domain, trans_wakeref);
+ }
+
+ WARN_ON(crtc_state->master_transcoder != INVALID_TRANSCODER &&
+ crtc_state->sync_mode_slaves_mask);
+}
+
static bool haswell_get_pipe_config(struct intel_crtc *crtc,
struct intel_crtc_state *pipe_config)
{
@@ -10445,6 +10525,8 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
intel_crtc_init_scalers(crtc, pipe_config);
+ pipe_config->master_transcoder = INVALID_TRANSCODER;
+
power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
wf = intel_display_power_get_if_enabled(dev_priv, power_domain);
if (!wf)
@@ -10553,6 +10635,10 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
pipe_config->pixel_multiplier = 1;
}
+ if (INTEL_GEN(dev_priv) >= 11 &&
+ !transcoder_is_dsi(pipe_config->cpu_transcoder))
+ icelake_get_trans_port_sync_config(pipe_config);
+
out:
for_each_power_domain(power_domain, power_domain_mask)
intel_display_power_put(dev_priv,
@@ -10574,21 +10660,13 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
else
base = intel_plane_ggtt_offset(plane_state);
- base += plane_state->color_plane[0].offset;
-
- /* ILK+ do this automagically */
- if (HAS_GMCH(dev_priv) &&
- plane_state->base.rotation & DRM_MODE_ROTATE_180)
- base += (plane_state->base.crtc_h *
- plane_state->base.crtc_w - 1) * fb->format->cpp[0];
-
- return base;
+ return base + plane_state->color_plane[0].offset;
}
static u32 intel_cursor_position(const struct intel_plane_state *plane_state)
{
- int x = plane_state->base.crtc_x;
- int y = plane_state->base.crtc_y;
+ int x = plane_state->base.dst.x1;
+ int y = plane_state->base.dst.y1;
u32 pos = 0;
if (x < 0) {
@@ -10610,8 +10688,8 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
{
const struct drm_mode_config *config =
&plane_state->base.plane->dev->mode_config;
- int width = plane_state->base.crtc_w;
- int height = plane_state->base.crtc_h;
+ int width = drm_rect_width(&plane_state->base.dst);
+ int height = drm_rect_height(&plane_state->base.dst);
return width > 0 && width <= config->cursor_width &&
height > 0 && height <= config->cursor_height;
@@ -10619,6 +10697,9 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
{
+ struct drm_i915_private *dev_priv =
+ to_i915(plane_state->base.plane->dev);
+ unsigned int rotation = plane_state->base.rotation;
int src_x, src_y;
u32 offset;
int ret;
@@ -10630,8 +10711,8 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
if (!plane_state->base.visible)
return 0;
- src_x = plane_state->base.src_x >> 16;
- src_y = plane_state->base.src_y >> 16;
+ src_x = plane_state->base.src.x1 >> 16;
+ src_y = plane_state->base.src.y1 >> 16;
intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
offset = intel_plane_compute_aligned_offset(&src_x, &src_y,
@@ -10642,7 +10723,25 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
return -EINVAL;
}
+ /*
+ * Put the final coordinates back so that the src
+ * coordinate checks will see the right values.
+ */
+ drm_rect_translate_to(&plane_state->base.src,
+ src_x << 16, src_y << 16);
+
+ /* ILK+ do this automagically in hardware */
+ if (HAS_GMCH(dev_priv) && rotation & DRM_MODE_ROTATE_180) {
+ const struct drm_framebuffer *fb = plane_state->base.fb;
+ int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+ int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+
+ offset += (src_h * src_w - 1) * fb->format->cpp[0];
+ }
+
plane_state->color_plane[0].offset = offset;
+ plane_state->color_plane[0].x = src_x;
+ plane_state->color_plane[0].y = src_y;
return 0;
}
@@ -10666,6 +10765,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state,
if (ret)
return ret;
+ /* Use the unclipped src/dst rectangles, which we program to hw */
+ plane_state->base.src = drm_plane_state_src(&plane_state->base);
+ plane_state->base.dst = drm_plane_state_dest(&plane_state->base);
+
ret = intel_cursor_check_surface(plane_state);
if (ret)
return ret;
@@ -10708,7 +10811,7 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state)
{
- int width = plane_state->base.crtc_w;
+ int width = drm_rect_width(&plane_state->base.dst);
/*
* 845g/865g are only limited by the width of their cursors,
@@ -10734,8 +10837,8 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state,
/* Check for which cursor types we support */
if (!i845_cursor_size_ok(plane_state)) {
DRM_DEBUG("Cursor dimension %dx%d not supported\n",
- plane_state->base.crtc_w,
- plane_state->base.crtc_h);
+ drm_rect_width(&plane_state->base.dst),
+ drm_rect_height(&plane_state->base.dst));
return -EINVAL;
}
@@ -10768,8 +10871,8 @@ static void i845_update_cursor(struct intel_plane *plane,
unsigned long irqflags;
if (plane_state && plane_state->base.visible) {
- unsigned int width = plane_state->base.crtc_w;
- unsigned int height = plane_state->base.crtc_h;
+ unsigned int width = drm_rect_width(&plane_state->base.src);
+ unsigned int height = drm_rect_height(&plane_state->base.dst);
cntl = plane_state->ctl |
i845_cursor_ctl_crtc(crtc_state);
@@ -10871,7 +10974,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv))
cntl |= MCURSOR_TRICKLE_FEED_DISABLE;
- switch (plane_state->base.crtc_w) {
+ switch (drm_rect_width(&plane_state->base.dst)) {
case 64:
cntl |= MCURSOR_MODE_64_ARGB_AX;
break;
@@ -10882,7 +10985,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
cntl |= MCURSOR_MODE_256_ARGB_AX;
break;
default:
- MISSING_CASE(plane_state->base.crtc_w);
+ MISSING_CASE(drm_rect_width(&plane_state->base.dst));
return 0;
}
@@ -10896,8 +10999,8 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->base.plane->dev);
- int width = plane_state->base.crtc_w;
- int height = plane_state->base.crtc_h;
+ int width = drm_rect_width(&plane_state->base.dst);
+ int height = drm_rect_height(&plane_state->base.dst);
if (!intel_cursor_size_ok(plane_state))
return false;
@@ -10950,17 +11053,19 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
/* Check for which cursor types we support */
if (!i9xx_cursor_size_ok(plane_state)) {
DRM_DEBUG("Cursor dimension %dx%d not supported\n",
- plane_state->base.crtc_w,
- plane_state->base.crtc_h);
+ drm_rect_width(&plane_state->base.dst),
+ drm_rect_height(&plane_state->base.dst));
return -EINVAL;
}
WARN_ON(plane_state->base.visible &&
plane_state->color_plane[0].stride != fb->pitches[0]);
- if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) {
+ if (fb->pitches[0] !=
+ drm_rect_width(&plane_state->base.dst) * fb->format->cpp[0]) {
DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n",
- fb->pitches[0], plane_state->base.crtc_w);
+ fb->pitches[0],
+ drm_rect_width(&plane_state->base.dst));
return -EINVAL;
}
@@ -10975,7 +11080,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
* Refuse the put the cursor into that compromised position.
*/
if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
- plane_state->base.visible && plane_state->base.crtc_x < 0) {
+ plane_state->base.visible && plane_state->base.dst.x1 < 0) {
DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
return -EINVAL;
}
@@ -10995,11 +11100,14 @@ static void i9xx_update_cursor(struct intel_plane *plane,
unsigned long irqflags;
if (plane_state && plane_state->base.visible) {
+ unsigned width = drm_rect_width(&plane_state->base.dst);
+ unsigned height = drm_rect_height(&plane_state->base.dst);
+
cntl = plane_state->ctl |
i9xx_cursor_ctl_crtc(crtc_state);
- if (plane_state->base.crtc_h != plane_state->base.crtc_w)
- fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1);
+ if (width != height)
+ fbc_ctl = CUR_FBC_CTL_EN | (height - 1);
base = intel_cursor_base(plane_state);
pos = intel_cursor_position(plane_state);
@@ -11816,15 +11924,99 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state)
return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes;
}
-static int intel_crtc_atomic_check(struct drm_crtc *_crtc,
- struct drm_crtc_state *_crtc_state)
+static int icl_add_sync_mode_crtcs(struct intel_crtc_state *crtc_state)
+{
+ struct drm_crtc *crtc = crtc_state->base.crtc;
+ struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state);
+ struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+ struct drm_connector *master_connector, *connector;
+ struct drm_connector_state *connector_state;
+ struct drm_connector_list_iter conn_iter;
+ struct drm_crtc *master_crtc = NULL;
+ struct drm_crtc_state *master_crtc_state;
+ struct intel_crtc_state *master_pipe_config;
+ int i, tile_group_id;
+
+ if (INTEL_GEN(dev_priv) < 11)
+ return 0;
+
+ /*
+ * In case of tiled displays there could be one or more slaves but there is
+ * only one master. Lets make the CRTC used by the connector corresponding
+ * to the last horizonal and last vertical tile a master/genlock CRTC.
+ * All the other CRTCs corresponding to other tiles of the same Tile group
+ * are the slave CRTCs and hold a pointer to their genlock CRTC.
+ */
+ for_each_new_connector_in_state(&state->base, connector, connector_state, i) {
+ if (connector_state->crtc != crtc)
+ continue;
+ if (!connector->has_tile)
+ continue;
+ if (crtc_state->base.mode.hdisplay != connector->tile_h_size ||
+ crtc_state->base.mode.vdisplay != connector->tile_v_size)
+ return 0;
+ if (connector->tile_h_loc == connector->num_h_tile - 1 &&
+ connector->tile_v_loc == connector->num_v_tile - 1)
+ continue;
+ crtc_state->sync_mode_slaves_mask = 0;
+ tile_group_id = connector->tile_group->id;
+ drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter);
+ drm_for_each_connector_iter(master_connector, &conn_iter) {
+ struct drm_connector_state *master_conn_state = NULL;
+
+ if (!master_connector->has_tile)
+ continue;
+ if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 ||
+ master_connector->tile_v_loc != master_connector->num_v_tile - 1)
+ continue;
+ if (master_connector->tile_group->id != tile_group_id)
+ continue;
+
+ master_conn_state = drm_atomic_get_connector_state(&state->base,
+ master_connector);
+ if (IS_ERR(master_conn_state)) {
+ drm_connector_list_iter_end(&conn_iter);
+ return PTR_ERR(master_conn_state);
+ }
+ if (master_conn_state->crtc) {
+ master_crtc = master_conn_state->crtc;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&conn_iter);
+
+ if (!master_crtc) {
+ DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n",
+ connector_state->crtc->base.id);
+ return -EINVAL;
+ }
+
+ master_crtc_state = drm_atomic_get_crtc_state(&state->base,
+ master_crtc);
+ if (IS_ERR(master_crtc_state))
+ return PTR_ERR(master_crtc_state);
+
+ master_pipe_config = to_intel_crtc_state(master_crtc_state);
+ crtc_state->master_transcoder = master_pipe_config->cpu_transcoder;
+ master_pipe_config->sync_mode_slaves_mask |=
+ BIT(crtc_state->cpu_transcoder);
+ DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n",
+ transcoder_name(crtc_state->master_transcoder),
+ crtc_state->base.crtc->base.id,
+ master_pipe_config->sync_mode_slaves_mask);
+ }
+
+ return 0;
+}
+
+static int intel_crtc_atomic_check(struct intel_atomic_state *state,
+ struct intel_crtc *crtc)
{
- struct intel_crtc *crtc = to_intel_crtc(_crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
struct intel_crtc_state *crtc_state =
- to_intel_crtc_state(_crtc_state);
- int ret;
+ intel_atomic_get_new_crtc_state(state, crtc);
bool mode_changed = needs_modeset(crtc_state);
+ int ret;
if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) &&
mode_changed && !crtc_state->base.active)
@@ -11896,10 +12088,6 @@ static int intel_crtc_atomic_check(struct drm_crtc *_crtc,
return ret;
}
-static const struct drm_crtc_helper_funcs intel_helper_funcs = {
- .atomic_check = intel_crtc_atomic_check,
-};
-
static void intel_modeset_update_connector_atomic_state(struct drm_device *dev)
{
struct intel_connector *connector;
@@ -12319,6 +12507,13 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
if (IS_G4X(dev_priv) ||
IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
saved_state->wm = crtc_state->wm;
+ /*
+ * Save the slave bitmask which gets filled for master crtc state during
+ * slave atomic check call.
+ */
+ if (is_trans_port_sync_master(crtc_state))
+ saved_state->sync_mode_slaves_mask =
+ crtc_state->sync_mode_slaves_mask;
/* Keep base drm_crtc_state intact, only clear our extended struct */
BUILD_BUG_ON(offsetof(struct intel_crtc_state, base));
@@ -12412,6 +12607,15 @@ encoder_retry:
drm_mode_set_crtcinfo(&pipe_config->base.adjusted_mode,
CRTC_STEREO_DOUBLE);
+ /* Set the crtc_state defaults for trans_port_sync */
+ pipe_config->master_transcoder = INVALID_TRANSCODER;
+ ret = icl_add_sync_mode_crtcs(pipe_config);
+ if (ret) {
+ DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n",
+ ret);
+ return ret;
+ }
+
/* Pass our mode to the connectors and the CRTC to give them a chance to
* adjust it according to limitations or connector properties, and also
* a chance to reject the mode entirely.
@@ -12558,8 +12762,9 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv,
}
}
-static void __printf(3, 4)
-pipe_config_mismatch(bool fastset, const char *name, const char *format, ...)
+static void __printf(4, 5)
+pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc,
+ const char *name, const char *format, ...)
{
struct va_format vaf;
va_list args;
@@ -12569,9 +12774,11 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...)
vaf.va = &args;
if (fastset)
- DRM_DEBUG_KMS("fastset mismatch in %s %pV\n", name, &vaf);
+ DRM_DEBUG_KMS("[CRTC:%d:%s] fastset mismatch in %s %pV\n",
+ crtc->base.base.id, crtc->base.name, name, &vaf);
else
- DRM_ERROR("mismatch in %s %pV\n", name, &vaf);
+ DRM_ERROR("[CRTC:%d:%s] mismatch in %s %pV\n",
+ crtc->base.base.id, crtc->base.name, name, &vaf);
va_end(args);
}
@@ -12599,6 +12806,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
bool fastset)
{
struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev);
+ struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
bool ret = true;
u32 bp_gamma = 0;
bool fixup_inherited = fastset &&
@@ -12612,8 +12820,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_X(name) do { \
if (current_config->name != pipe_config->name) { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "(expected 0x%08x, found 0x%08x)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "(expected 0x%08x, found 0x%08x)", \
current_config->name, \
pipe_config->name); \
ret = false; \
@@ -12622,8 +12830,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_I(name) do { \
if (current_config->name != pipe_config->name) { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "(expected %i, found %i)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "(expected %i, found %i)", \
current_config->name, \
pipe_config->name); \
ret = false; \
@@ -12632,8 +12840,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_BOOL(name) do { \
if (current_config->name != pipe_config->name) { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "(expected %s, found %s)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "(expected %s, found %s)", \
yesno(current_config->name), \
yesno(pipe_config->name)); \
ret = false; \
@@ -12649,8 +12857,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \
PIPE_CONF_CHECK_BOOL(name); \
} else { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \
yesno(current_config->name), \
yesno(pipe_config->name)); \
ret = false; \
@@ -12659,8 +12867,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_P(name) do { \
if (current_config->name != pipe_config->name) { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "(expected %p, found %p)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "(expected %p, found %p)", \
current_config->name, \
pipe_config->name); \
ret = false; \
@@ -12671,9 +12879,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
if (!intel_compare_link_m_n(&current_config->name, \
&pipe_config->name,\
!fastset)) { \
- pipe_config_mismatch(fastset, __stringify(name), \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
"(expected tu %i gmch %i/%i link %i/%i, " \
- "found tu %i, gmch %i/%i link %i/%i)\n", \
+ "found tu %i, gmch %i/%i link %i/%i)", \
current_config->name.tu, \
current_config->name.gmch_m, \
current_config->name.gmch_n, \
@@ -12698,10 +12906,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
&pipe_config->name, !fastset) && \
!intel_compare_link_m_n(&current_config->alt_name, \
&pipe_config->name, !fastset)) { \
- pipe_config_mismatch(fastset, __stringify(name), \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
"(expected tu %i gmch %i/%i link %i/%i, " \
"or tu %i gmch %i/%i link %i/%i, " \
- "found tu %i, gmch %i/%i link %i/%i)\n", \
+ "found tu %i, gmch %i/%i link %i/%i)", \
current_config->name.tu, \
current_config->name.gmch_m, \
current_config->name.gmch_n, \
@@ -12723,8 +12931,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_FLAGS(name, mask) do { \
if ((current_config->name ^ pipe_config->name) & (mask)) { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "(%x) (expected %i, found %i)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "(%x) (expected %i, found %i)", \
(mask), \
current_config->name & (mask), \
pipe_config->name & (mask)); \
@@ -12734,8 +12942,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \
if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \
- pipe_config_mismatch(fastset, __stringify(name), \
- "(expected %i, found %i)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name), \
+ "(expected %i, found %i)", \
current_config->name, \
pipe_config->name); \
ret = false; \
@@ -12754,8 +12962,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
#define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \
if (current_config->name1 != pipe_config->name1) { \
- pipe_config_mismatch(fastset, __stringify(name1), \
- "(expected %i, found %i, won't compare lut values)\n", \
+ pipe_config_mismatch(fastset, crtc, __stringify(name1), \
+ "(expected %i, found %i, won't compare lut values)", \
current_config->name1, \
pipe_config->name1); \
ret = false;\
@@ -12763,8 +12971,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
if (!intel_color_lut_equal(current_config->name2, \
pipe_config->name2, pipe_config->name1, \
bit_precision)) { \
- pipe_config_mismatch(fastset, __stringify(name2), \
- "hw_state doesn't match sw_state\n"); \
+ pipe_config_mismatch(fastset, crtc, __stringify(name2), \
+ "hw_state doesn't match sw_state"); \
ret = false; \
} \
} \
@@ -12808,6 +13016,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
PIPE_CONF_CHECK_I(pixel_multiplier);
PIPE_CONF_CHECK_I(output_format);
+ PIPE_CONF_CHECK_I(dc3co_exitline);
PIPE_CONF_CHECK_BOOL(has_hdmi_sink);
if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) ||
IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
@@ -12926,6 +13135,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
PIPE_CONF_CHECK_INFOFRAME(hdmi);
PIPE_CONF_CHECK_INFOFRAME(drm);
+ PIPE_CONF_CHECK_I(sync_mode_slaves_mask);
+ PIPE_CONF_CHECK_I(master_transcoder);
+
#undef PIPE_CONF_CHECK_X
#undef PIPE_CONF_CHECK_I
#undef PIPE_CONF_CHECK_BOOL
@@ -13361,10 +13573,15 @@ intel_modeset_verify_disabled(struct drm_i915_private *dev_priv,
verify_disabled_dpll_state(dev_priv);
}
-static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
+static void
+intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state)
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ const struct drm_display_mode *adjusted_mode =
+ &crtc_state->base.adjusted_mode;
+
+ drm_calc_timestamping_constants(&crtc->base, adjusted_mode);
/*
* The scanline counter increments at the leading edge of hsync.
@@ -13394,7 +13611,6 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
* answer that's slightly in the future.
*/
if (IS_GEN(dev_priv, 2)) {
- const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode;
int vtotal;
vtotal = adjusted_mode->crtc_vtotal;
@@ -13405,8 +13621,9 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
} else if (HAS_DDI(dev_priv) &&
intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
crtc->scanline_offset = 2;
- } else
+ } else {
crtc->scanline_offset = 1;
+ }
}
static void intel_modeset_clear_plls(struct intel_atomic_state *state)
@@ -13573,6 +13790,42 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta
new_crtc_state->has_drrs = old_crtc_state->has_drrs;
}
+static int intel_atomic_check_planes(struct intel_atomic_state *state)
+{
+ struct intel_plane_state *plane_state;
+ struct intel_plane *plane;
+ int i, ret;
+
+ for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+ ret = intel_plane_atomic_check(state, plane);
+ if (ret) {
+ DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n",
+ plane->base.base.id, plane->base.name);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_atomic_check_crtcs(struct intel_atomic_state *state)
+{
+ struct intel_crtc_state *crtc_state;
+ struct intel_crtc *crtc;
+ int i;
+
+ for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+ int ret = intel_crtc_atomic_check(state, crtc);
+ if (ret) {
+ DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n",
+ crtc->base.base.id, crtc->base.name);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
/**
* intel_atomic_check - validate state object
* @dev: drm device
@@ -13636,7 +13889,11 @@ static int intel_atomic_check(struct drm_device *dev,
if (ret)
goto fail;
- ret = drm_atomic_helper_check_planes(dev, &state->base);
+ ret = intel_atomic_check_planes(state);
+ if (ret)
+ goto fail;
+
+ ret = intel_atomic_check_crtcs(state);
if (ret)
goto fail;
@@ -13694,20 +13951,103 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
return crtc->base.funcs->get_vblank_counter(&crtc->base);
}
+void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
+ struct intel_crtc_state *crtc_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+ if (!IS_GEN(dev_priv, 2))
+ intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
+
+ if (crtc_state->has_pch_encoder) {
+ enum pipe pch_transcoder =
+ intel_crtc_pch_transcoder(crtc);
+
+ intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true);
+ }
+}
+
+static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
+ const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+ /* drm_atomic_helper_update_legacy_modeset_state might not be called. */
+ crtc->base.mode = new_crtc_state->base.mode;
+
+ /*
+ * Update pipe size and adjust fitter if needed: the reason for this is
+ * that in compute_mode_changes we check the native mode (not the pfit
+ * mode) to see if we can flip rather than do a full mode set. In the
+ * fastboot case, we'll flip, but if we don't update the pipesrc and
+ * pfit state, we'll end up with a big fb scanned out into the wrong
+ * sized surface.
+ */
+ intel_set_pipe_src_size(new_crtc_state);
+
+ /* on skylake this is done by detaching scalers */
+ if (INTEL_GEN(dev_priv) >= 9) {
+ skl_detach_scalers(new_crtc_state);
+
+ if (new_crtc_state->pch_pfit.enabled)
+ skylake_pfit_enable(new_crtc_state);
+ } else if (HAS_PCH_SPLIT(dev_priv)) {
+ if (new_crtc_state->pch_pfit.enabled)
+ ironlake_pfit_enable(new_crtc_state);
+ else if (old_crtc_state->pch_pfit.enabled)
+ ironlake_pfit_disable(old_crtc_state);
+ }
+
+ if (INTEL_GEN(dev_priv) >= 11)
+ icl_set_pipe_chicken(crtc);
+}
+
+static void commit_pipe_config(struct intel_atomic_state *state,
+ struct intel_crtc_state *old_crtc_state,
+ struct intel_crtc_state *new_crtc_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+ bool modeset = needs_modeset(new_crtc_state);
+
+ /*
+ * During modesets pipe configuration was programmed as the
+ * CRTC was enabled.
+ */
+ if (!modeset) {
+ if (new_crtc_state->base.color_mgmt_changed ||
+ new_crtc_state->update_pipe)
+ intel_color_commit(new_crtc_state);
+
+ if (INTEL_GEN(dev_priv) >= 9)
+ skl_detach_scalers(new_crtc_state);
+
+ if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
+ bdw_set_pipemisc(new_crtc_state);
+
+ if (new_crtc_state->update_pipe)
+ intel_pipe_fastset(old_crtc_state, new_crtc_state);
+ }
+
+ if (dev_priv->display.atomic_update_watermarks)
+ dev_priv->display.atomic_update_watermarks(state,
+ new_crtc_state);
+}
+
static void intel_update_crtc(struct intel_crtc *crtc,
struct intel_atomic_state *state,
struct intel_crtc_state *old_crtc_state,
struct intel_crtc_state *new_crtc_state)
{
- struct drm_device *dev = state->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
+ struct drm_i915_private *dev_priv = to_i915(state->base.dev);
bool modeset = needs_modeset(new_crtc_state);
struct intel_plane_state *new_plane_state =
intel_atomic_get_new_plane_state(state,
to_intel_plane(crtc->base.primary));
if (modeset) {
- update_scanline_offset(new_crtc_state);
+ intel_crtc_update_active_timings(new_crtc_state);
+
dev_priv->display.crtc_enable(new_crtc_state, state);
/* vblanks work again, re-enable pipe CRC. */
@@ -13724,14 +14064,39 @@ static void intel_update_crtc(struct intel_crtc *crtc,
else if (new_plane_state)
intel_fbc_enable(crtc, new_crtc_state, new_plane_state);
- intel_begin_crtc_commit(state, crtc);
+ /* Perform vblank evasion around commit operation */
+ intel_pipe_update_start(new_crtc_state);
+
+ commit_pipe_config(state, old_crtc_state, new_crtc_state);
if (INTEL_GEN(dev_priv) >= 9)
skl_update_planes_on_crtc(state, crtc);
else
i9xx_update_planes_on_crtc(state, crtc);
- intel_finish_crtc_commit(state, crtc);
+ intel_pipe_update_end(new_crtc_state);
+
+ /*
+ * We usually enable FIFO underrun interrupts as part of the
+ * CRTC enable sequence during modesets. But when we inherit a
+ * valid pipe configuration from the BIOS we need to take care
+ * of enabling them on the CRTC's first fastset.
+ */
+ if (new_crtc_state->update_pipe && !modeset &&
+ old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED)
+ intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
+}
+
+static struct intel_crtc *intel_get_slave_crtc(const struct intel_crtc_state *new_crtc_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(new_crtc_state->base.crtc->dev);
+ enum transcoder slave_transcoder;
+
+ WARN_ON(!is_power_of_2(new_crtc_state->sync_mode_slaves_mask));
+
+ slave_transcoder = ffs(new_crtc_state->sync_mode_slaves_mask) - 1;
+ return intel_get_crtc_for_pipe(dev_priv,
+ (enum pipe)slave_transcoder);
}
static void intel_old_crtc_state_disables(struct intel_atomic_state *state,
@@ -13769,6 +14134,37 @@ static void intel_old_crtc_state_disables(struct intel_atomic_state *state,
new_crtc_state);
}
+static void intel_trans_port_sync_modeset_disables(struct intel_atomic_state *state,
+ struct intel_crtc *crtc,
+ struct intel_crtc_state *old_crtc_state,
+ struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state);
+ struct intel_crtc_state *new_slave_crtc_state =
+ intel_atomic_get_new_crtc_state(state, slave_crtc);
+ struct intel_crtc_state *old_slave_crtc_state =
+ intel_atomic_get_old_crtc_state(state, slave_crtc);
+
+ WARN_ON(!slave_crtc || !new_slave_crtc_state ||
+ !old_slave_crtc_state);
+
+ /* Disable Slave first */
+ intel_pre_plane_update(old_slave_crtc_state, new_slave_crtc_state);
+ if (old_slave_crtc_state->base.active)
+ intel_old_crtc_state_disables(state,
+ old_slave_crtc_state,
+ new_slave_crtc_state,
+ slave_crtc);
+
+ /* Disable Master */
+ intel_pre_plane_update(old_crtc_state, new_crtc_state);
+ if (old_crtc_state->base.active)
+ intel_old_crtc_state_disables(state,
+ old_crtc_state,
+ new_crtc_state,
+ crtc);
+}
+
static void intel_commit_modeset_disables(struct intel_atomic_state *state)
{
struct intel_crtc_state *new_crtc_state, *old_crtc_state;
@@ -13787,13 +14183,28 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state)
if (!needs_modeset(new_crtc_state))
continue;
- intel_pre_plane_update(old_crtc_state, new_crtc_state);
+ /* In case of Transcoder port Sync master slave CRTCs can be
+ * assigned in any order and we need to make sure that
+ * slave CRTCs are disabled first and then master CRTC since
+ * Slave vblanks are masked till Master Vblanks.
+ */
+ if (is_trans_port_sync_mode(new_crtc_state)) {
+ if (is_trans_port_sync_master(new_crtc_state))
+ intel_trans_port_sync_modeset_disables(state,
+ crtc,
+ old_crtc_state,
+ new_crtc_state);
+ else
+ continue;
+ } else {
+ intel_pre_plane_update(old_crtc_state, new_crtc_state);
- if (old_crtc_state->base.active)
- intel_old_crtc_state_disables(state,
- old_crtc_state,
- new_crtc_state,
- crtc);
+ if (old_crtc_state->base.active)
+ intel_old_crtc_state_disables(state,
+ old_crtc_state,
+ new_crtc_state,
+ crtc);
+ }
}
}
@@ -13812,6 +14223,113 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state)
}
}
+static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc,
+ struct intel_atomic_state *state,
+ struct intel_crtc_state *new_crtc_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+
+ intel_crtc_update_active_timings(new_crtc_state);
+ dev_priv->display.crtc_enable(new_crtc_state, state);
+ intel_crtc_enable_pipe_crc(crtc);
+}
+
+static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc,
+ struct intel_atomic_state *state)
+{
+ struct drm_connector_state *conn_state;
+ struct drm_connector *conn;
+ struct intel_dp *intel_dp;
+ int i;
+
+ for_each_new_connector_in_state(&state->base, conn, conn_state, i) {
+ if (conn_state->crtc == &crtc->base)
+ break;
+ }
+ intel_dp = enc_to_intel_dp(&intel_attached_encoder(conn)->base);
+ intel_dp_stop_link_train(intel_dp);
+}
+
+static void intel_post_crtc_enable_updates(struct intel_crtc *crtc,
+ struct intel_atomic_state *state)
+{
+ struct intel_crtc_state *new_crtc_state =
+ intel_atomic_get_new_crtc_state(state, crtc);
+ struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+ struct intel_plane_state *new_plane_state =
+ intel_atomic_get_new_plane_state(state,
+ to_intel_plane(crtc->base.primary));
+ bool modeset = needs_modeset(new_crtc_state);
+
+ if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc)
+ intel_fbc_disable(crtc);
+ else if (new_plane_state)
+ intel_fbc_enable(crtc, new_crtc_state, new_plane_state);
+
+ /* Perform vblank evasion around commit operation */
+ intel_pipe_update_start(new_crtc_state);
+ commit_pipe_config(state, old_crtc_state, new_crtc_state);
+ skl_update_planes_on_crtc(state, crtc);
+ intel_pipe_update_end(new_crtc_state);
+
+ /*
+ * We usually enable FIFO underrun interrupts as part of the
+ * CRTC enable sequence during modesets. But when we inherit a
+ * valid pipe configuration from the BIOS we need to take care
+ * of enabling them on the CRTC's first fastset.
+ */
+ if (new_crtc_state->update_pipe && !modeset &&
+ old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED)
+ intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
+}
+
+static void intel_update_trans_port_sync_crtcs(struct intel_crtc *crtc,
+ struct intel_atomic_state *state,
+ struct intel_crtc_state *old_crtc_state,
+ struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state);
+ struct intel_crtc_state *new_slave_crtc_state =
+ intel_atomic_get_new_crtc_state(state, slave_crtc);
+ struct intel_crtc_state *old_slave_crtc_state =
+ intel_atomic_get_old_crtc_state(state, slave_crtc);
+
+ WARN_ON(!slave_crtc || !new_slave_crtc_state ||
+ !old_slave_crtc_state);
+
+ DRM_DEBUG_KMS("Updating Transcoder Port Sync Master CRTC = %d %s and Slave CRTC %d %s\n",
+ crtc->base.base.id, crtc->base.name, slave_crtc->base.base.id,
+ slave_crtc->base.name);
+
+ /* Enable seq for slave with with DP_TP_CTL left Idle until the
+ * master is ready
+ */
+ intel_crtc_enable_trans_port_sync(slave_crtc,
+ state,
+ new_slave_crtc_state);
+
+ /* Enable seq for master with with DP_TP_CTL left Idle */
+ intel_crtc_enable_trans_port_sync(crtc,
+ state,
+ new_crtc_state);
+
+ /* Set Slave's DP_TP_CTL to Normal */
+ intel_set_dp_tp_ctl_normal(slave_crtc,
+ state);
+
+ /* Set Master's DP_TP_CTL To Normal */
+ usleep_range(200, 400);
+ intel_set_dp_tp_ctl_normal(crtc,
+ state);
+
+ /* Now do the post crtc enable for all master and slaves */
+ intel_post_crtc_enable_updates(slave_crtc,
+ state);
+ intel_post_crtc_enable_updates(crtc,
+ state);
+}
+
static void skl_commit_modeset_enables(struct intel_atomic_state *state)
{
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
@@ -13819,7 +14337,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
struct intel_crtc_state *old_crtc_state, *new_crtc_state;
unsigned int updated = 0;
bool progress;
- enum pipe pipe;
int i;
u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
u8 required_slices = state->wm_results.ddb.enabled_slices;
@@ -13844,12 +14361,11 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
progress = false;
for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ enum pipe pipe = crtc->pipe;
bool vbl_wait = false;
- unsigned int cmask = drm_crtc_mask(&crtc->base);
-
- pipe = crtc->pipe;
+ bool modeset = needs_modeset(new_crtc_state);
- if (updated & cmask || !new_crtc_state->base.active)
+ if (updated & BIT(crtc->pipe) || !new_crtc_state->base.active)
continue;
if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
@@ -13857,7 +14373,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
INTEL_NUM_PIPES(dev_priv), i))
continue;
- updated |= cmask;
+ updated |= BIT(pipe);
entries[i] = new_crtc_state->wm.skl.ddb;
/*
@@ -13868,12 +14384,22 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
*/
if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb,
&old_crtc_state->wm.skl.ddb) &&
- !new_crtc_state->base.active_changed &&
+ !modeset &&
state->wm_results.dirty_pipes != updated)
vbl_wait = true;
- intel_update_crtc(crtc, state, old_crtc_state,
- new_crtc_state);
+ if (modeset && is_trans_port_sync_mode(new_crtc_state)) {
+ if (is_trans_port_sync_master(new_crtc_state))
+ intel_update_trans_port_sync_crtcs(crtc,
+ state,
+ old_crtc_state,
+ new_crtc_state);
+ else
+ continue;
+ } else {
+ intel_update_crtc(crtc, state, old_crtc_state,
+ new_crtc_state);
+ }
if (vbl_wait)
intel_wait_for_vblank(dev_priv, pipe);
@@ -14346,7 +14872,7 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
/**
* intel_prepare_plane_fb - Prepare fb for usage on plane
* @plane: drm plane to prepare for
- * @new_state: the plane state being prepared
+ * @_new_plane_state: the plane state being prepared
*
* Prepares a framebuffer for usage on a display plane. Generally this
* involves pinning the underlying object and updating the frontbuffer tracking
@@ -14357,12 +14883,14 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
*/
int
intel_prepare_plane_fb(struct drm_plane *plane,
- struct drm_plane_state *new_state)
+ struct drm_plane_state *_new_plane_state)
{
+ struct intel_plane_state *new_plane_state =
+ to_intel_plane_state(_new_plane_state);
struct intel_atomic_state *intel_state =
- to_intel_atomic_state(new_state->state);
+ to_intel_atomic_state(new_plane_state->base.state);
struct drm_i915_private *dev_priv = to_i915(plane->dev);
- struct drm_framebuffer *fb = new_state->fb;
+ struct drm_framebuffer *fb = new_plane_state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
int ret;
@@ -14393,9 +14921,9 @@ intel_prepare_plane_fb(struct drm_plane *plane,
}
}
- if (new_state->fence) { /* explicit fencing */
+ if (new_plane_state->base.fence) { /* explicit fencing */
ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready,
- new_state->fence,
+ new_plane_state->base.fence,
I915_FENCE_TIMEOUT,
GFP_KERNEL);
if (ret < 0)
@@ -14409,7 +14937,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
if (ret)
return ret;
- ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
+ ret = intel_plane_pin_fb(new_plane_state);
i915_gem_object_unpin_pages(obj);
if (ret)
@@ -14418,7 +14946,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
fb_obj_bump_render_priority(obj);
intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB);
- if (!new_state->fence) { /* implicit fencing */
+ if (!new_plane_state->base.fence) { /* implicit fencing */
struct dma_fence *fence;
ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
@@ -14430,11 +14958,13 @@ intel_prepare_plane_fb(struct drm_plane *plane,
fence = dma_resv_get_excl_rcu(obj->base.resv);
if (fence) {
- add_rps_boost_after_vblank(new_state->crtc, fence);
+ add_rps_boost_after_vblank(new_plane_state->base.crtc,
+ fence);
dma_fence_put(fence);
}
} else {
- add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
+ add_rps_boost_after_vblank(new_plane_state->base.crtc,
+ new_plane_state->base.fence);
}
/*
@@ -14456,16 +14986,18 @@ intel_prepare_plane_fb(struct drm_plane *plane,
/**
* intel_cleanup_plane_fb - Cleans up an fb after plane use
* @plane: drm plane to clean up for
- * @old_state: the state from the previous modeset
+ * @_old_plane_state: the state from the previous modeset
*
* Cleans up a framebuffer that has just been removed from a plane.
*/
void
intel_cleanup_plane_fb(struct drm_plane *plane,
- struct drm_plane_state *old_state)
+ struct drm_plane_state *_old_plane_state)
{
+ struct intel_plane_state *old_plane_state =
+ to_intel_plane_state(_old_plane_state);
struct intel_atomic_state *intel_state =
- to_intel_atomic_state(old_state->state);
+ to_intel_atomic_state(old_plane_state->base.state);
struct drm_i915_private *dev_priv = to_i915(plane->dev);
if (intel_state->rps_interactive) {
@@ -14474,7 +15006,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
}
/* Should only be called after a successful intel_prepare_plane_fb()! */
- intel_plane_unpin_fb(to_intel_plane_state(old_state));
+ intel_plane_unpin_fb(old_plane_state);
}
int
@@ -14515,72 +15047,6 @@ skl_max_scale(const struct intel_crtc_state *crtc_state,
return max_scale;
}
-static void intel_begin_crtc_commit(struct intel_atomic_state *state,
- struct intel_crtc *crtc)
-{
- struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
- struct intel_crtc_state *old_crtc_state =
- intel_atomic_get_old_crtc_state(state, crtc);
- struct intel_crtc_state *new_crtc_state =
- intel_atomic_get_new_crtc_state(state, crtc);
- bool modeset = needs_modeset(new_crtc_state);
-
- /* Perform vblank evasion around commit operation */
- intel_pipe_update_start(new_crtc_state);
-
- if (modeset)
- goto out;
-
- if (new_crtc_state->base.color_mgmt_changed ||
- new_crtc_state->update_pipe)
- intel_color_commit(new_crtc_state);
-
- if (new_crtc_state->update_pipe)
- intel_update_pipe_config(old_crtc_state, new_crtc_state);
- else if (INTEL_GEN(dev_priv) >= 9)
- skl_detach_scalers(new_crtc_state);
-
- if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
- bdw_set_pipemisc(new_crtc_state);
-
-out:
- if (dev_priv->display.atomic_update_watermarks)
- dev_priv->display.atomic_update_watermarks(state,
- new_crtc_state);
-}
-
-void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
- struct intel_crtc_state *crtc_state)
-{
- struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
- if (!IS_GEN(dev_priv, 2))
- intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
-
- if (crtc_state->has_pch_encoder) {
- enum pipe pch_transcoder =
- intel_crtc_pch_transcoder(crtc);
-
- intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true);
- }
-}
-
-static void intel_finish_crtc_commit(struct intel_atomic_state *state,
- struct intel_crtc *crtc)
-{
- struct intel_crtc_state *old_crtc_state =
- intel_atomic_get_old_crtc_state(state, crtc);
- struct intel_crtc_state *new_crtc_state =
- intel_atomic_get_new_crtc_state(state, crtc);
-
- intel_pipe_update_end(new_crtc_state);
-
- if (new_crtc_state->update_pipe &&
- !needs_modeset(new_crtc_state) &&
- old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED)
- intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
-}
-
/**
* intel_plane_destroy - destroy a plane
* @plane: plane to destroy
@@ -14668,8 +15134,8 @@ static const struct drm_plane_funcs i8xx_plane_funcs = {
};
static int
-intel_legacy_cursor_update(struct drm_plane *plane,
- struct drm_crtc *crtc,
+intel_legacy_cursor_update(struct drm_plane *_plane,
+ struct drm_crtc *_crtc,
struct drm_framebuffer *fb,
int crtc_x, int crtc_y,
unsigned int crtc_w, unsigned int crtc_h,
@@ -14677,10 +15143,13 @@ intel_legacy_cursor_update(struct drm_plane *plane,
u32 src_w, u32 src_h,
struct drm_modeset_acquire_ctx *ctx)
{
- struct drm_plane_state *old_plane_state, *new_plane_state;
- struct intel_plane *intel_plane = to_intel_plane(plane);
+ struct intel_plane *plane = to_intel_plane(_plane);
+ struct intel_crtc *crtc = to_intel_crtc(_crtc);
+ struct intel_plane_state *old_plane_state =
+ to_intel_plane_state(plane->base.state);
+ struct intel_plane_state *new_plane_state;
struct intel_crtc_state *crtc_state =
- to_intel_crtc_state(crtc->state);
+ to_intel_crtc_state(crtc->base.state);
struct intel_crtc_state *new_crtc_state;
int ret;
@@ -14692,14 +15161,13 @@ intel_legacy_cursor_update(struct drm_plane *plane,
crtc_state->update_pipe)
goto slow;
- old_plane_state = plane->state;
/*
* Don't do an async update if there is an outstanding commit modifying
* the plane. This prevents our async update's changes from getting
* overridden by a previous synchronous update's state.
*/
- if (old_plane_state->commit &&
- !try_wait_for_completion(&old_plane_state->commit->hw_done))
+ if (old_plane_state->base.commit &&
+ !try_wait_for_completion(&old_plane_state->base.commit->hw_done))
goto slow;
/*
@@ -14707,52 +15175,51 @@ intel_legacy_cursor_update(struct drm_plane *plane,
* take the slowpath. Only changing fb or position should be
* in the fastpath.
*/
- if (old_plane_state->crtc != crtc ||
- old_plane_state->src_w != src_w ||
- old_plane_state->src_h != src_h ||
- old_plane_state->crtc_w != crtc_w ||
- old_plane_state->crtc_h != crtc_h ||
- !old_plane_state->fb != !fb)
+ if (old_plane_state->base.crtc != &crtc->base ||
+ old_plane_state->base.src_w != src_w ||
+ old_plane_state->base.src_h != src_h ||
+ old_plane_state->base.crtc_w != crtc_w ||
+ old_plane_state->base.crtc_h != crtc_h ||
+ !old_plane_state->base.fb != !fb)
goto slow;
- new_plane_state = intel_plane_duplicate_state(plane);
+ new_plane_state = to_intel_plane_state(intel_plane_duplicate_state(&plane->base));
if (!new_plane_state)
return -ENOMEM;
- new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(crtc));
+ new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(&crtc->base));
if (!new_crtc_state) {
ret = -ENOMEM;
goto out_free;
}
- drm_atomic_set_fb_for_plane(new_plane_state, fb);
+ drm_atomic_set_fb_for_plane(&new_plane_state->base, fb);
- new_plane_state->src_x = src_x;
- new_plane_state->src_y = src_y;
- new_plane_state->src_w = src_w;
- new_plane_state->src_h = src_h;
- new_plane_state->crtc_x = crtc_x;
- new_plane_state->crtc_y = crtc_y;
- new_plane_state->crtc_w = crtc_w;
- new_plane_state->crtc_h = crtc_h;
+ new_plane_state->base.src_x = src_x;
+ new_plane_state->base.src_y = src_y;
+ new_plane_state->base.src_w = src_w;
+ new_plane_state->base.src_h = src_h;
+ new_plane_state->base.crtc_x = crtc_x;
+ new_plane_state->base.crtc_y = crtc_y;
+ new_plane_state->base.crtc_w = crtc_w;
+ new_plane_state->base.crtc_h = crtc_h;
ret = intel_plane_atomic_check_with_state(crtc_state, new_crtc_state,
- to_intel_plane_state(old_plane_state),
- to_intel_plane_state(new_plane_state));
+ old_plane_state, new_plane_state);
if (ret)
goto out_free;
- ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state));
+ ret = intel_plane_pin_fb(new_plane_state);
if (ret)
goto out_free;
- intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
- intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
- to_intel_frontbuffer(fb),
- intel_plane->frontbuffer_bit);
+ intel_frontbuffer_flush(to_intel_frontbuffer(new_plane_state->base.fb), ORIGIN_FLIP);
+ intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->base.fb),
+ to_intel_frontbuffer(new_plane_state->base.fb),
+ plane->frontbuffer_bit);
/* Swap plane state */
- plane->state = new_plane_state;
+ plane->base.state = &new_plane_state->base;
/*
* We cannot swap crtc_state as it may be in use by an atomic commit or
@@ -14766,25 +15233,24 @@ intel_legacy_cursor_update(struct drm_plane *plane,
*/
crtc_state->active_planes = new_crtc_state->active_planes;
- if (plane->state->visible)
- intel_update_plane(intel_plane, crtc_state,
- to_intel_plane_state(plane->state));
+ if (new_plane_state->base.visible)
+ intel_update_plane(plane, crtc_state, new_plane_state);
else
- intel_disable_plane(intel_plane, crtc_state);
+ intel_disable_plane(plane, crtc_state);
- intel_plane_unpin_fb(to_intel_plane_state(old_plane_state));
+ intel_plane_unpin_fb(old_plane_state);
out_free:
if (new_crtc_state)
- intel_crtc_destroy_state(crtc, &new_crtc_state->base);
+ intel_crtc_destroy_state(&crtc->base, &new_crtc_state->base);
if (ret)
- intel_plane_destroy_state(plane, new_plane_state);
+ intel_plane_destroy_state(&plane->base, &new_plane_state->base);
else
- intel_plane_destroy_state(plane, old_plane_state);
+ intel_plane_destroy_state(&plane->base, &old_plane_state->base);
return ret;
slow:
- return drm_atomic_helper_update_plane(plane, crtc, fb,
+ return drm_atomic_helper_update_plane(&plane->base, &crtc->base, fb,
crtc_x, crtc_y, crtc_w, crtc_h,
src_x, src_y, src_w, src_h, ctx);
}
@@ -15172,8 +15638,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc;
}
- drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
-
intel_color_init(intel_crtc);
WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
@@ -15208,21 +15672,18 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data,
return 0;
}
-static int intel_encoder_clones(struct intel_encoder *encoder)
+static u32 intel_encoder_possible_clones(struct intel_encoder *encoder)
{
struct drm_device *dev = encoder->base.dev;
struct intel_encoder *source_encoder;
- int index_mask = 0;
- int entry = 0;
+ u32 possible_clones = 0;
for_each_intel_encoder(dev, source_encoder) {
if (encoders_cloneable(encoder, source_encoder))
- index_mask |= (1 << entry);
-
- entry++;
+ possible_clones |= drm_encoder_mask(&source_encoder->base);
}
- return index_mask;
+ return possible_clones;
}
static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder)
@@ -15542,7 +16003,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
encoder->base.possible_crtcs =
intel_encoder_possible_crtcs(encoder);
encoder->base.possible_clones =
- intel_encoder_clones(encoder);
+ intel_encoder_possible_clones(encoder);
}
intel_init_pch_refclk(dev_priv);
@@ -16723,24 +17184,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
drm_connector_list_iter_begin(dev, &conn_iter);
for_each_intel_connector_iter(connector, &conn_iter) {
if (connector->get_hw_state(connector)) {
+ struct intel_crtc_state *crtc_state;
+ struct intel_crtc *crtc;
+
connector->base.dpms = DRM_MODE_DPMS_ON;
encoder = connector->encoder;
connector->base.encoder = &encoder->base;
- if (encoder->base.crtc &&
- encoder->base.crtc->state->active) {
+ crtc = to_intel_crtc(encoder->base.crtc);
+ crtc_state = crtc ? to_intel_crtc_state(crtc->base.state) : NULL;
+
+ if (crtc_state && crtc_state->base.active) {
/*
* This has to be done during hardware readout
* because anything calling .crtc_disable may
* rely on the connector_mask being accurate.
*/
- encoder->base.crtc->state->connector_mask |=
+ crtc_state->base.connector_mask |=
drm_connector_mask(&connector->base);
- encoder->base.crtc->state->encoder_mask |=
+ crtc_state->base.encoder_mask |=
drm_encoder_mask(&encoder->base);
}
-
} else {
connector->base.dpms = DRM_MODE_DPMS_OFF;
connector->base.encoder = NULL;
@@ -16765,7 +17230,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
crtc->base.mode.hdisplay = crtc_state->pipe_src_w;
crtc->base.mode.vdisplay = crtc_state->pipe_src_h;
intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state);
- WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode));
+ WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &crtc->base.mode));
/*
* The initial mode needs to be set in order to keep
@@ -16780,15 +17245,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
intel_crtc_compute_pixel_rate(crtc_state);
- if (dev_priv->display.modeset_calc_cdclk) {
- min_cdclk = intel_crtc_compute_min_cdclk(crtc_state);
- if (WARN_ON(min_cdclk < 0))
- min_cdclk = 0;
- }
+ min_cdclk = intel_crtc_compute_min_cdclk(crtc_state);
+ if (WARN_ON(min_cdclk < 0))
+ min_cdclk = 0;
- drm_calc_timestamping_constants(&crtc->base,
- &crtc_state->base.adjusted_mode);
- update_scanline_offset(crtc_state);
+ intel_crtc_update_active_timings(crtc_state);
}
dev_priv->min_cdclk[crtc->pipe] = min_cdclk;
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 2782f23ee887..7dcb176d91b0 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -27,6 +27,7 @@
#include <drm/drm_util.h>
#include <drm/i915_drm.h>
+#include "intel_dp_link_training.h"
enum link_m_n_set;
struct dpll;
@@ -54,6 +55,7 @@ struct intel_plane;
struct intel_plane_state;
struct intel_remapped_info;
struct intel_rotation_info;
+struct intel_crtc_state;
enum i915_gpio {
GPIOA,
@@ -93,6 +95,7 @@ enum pipe {
#define pipe_name(p) ((p) + 'A')
enum transcoder {
+ INVALID_TRANSCODER = -1,
/*
* The following transcoders have a 1:1 transcoder -> pipe mapping,
* keep their values fixed: the code assumes that TRANSCODER_A=0, the
@@ -271,6 +274,7 @@ enum aux_ch {
AUX_CH_D,
AUX_CH_E, /* ICL+ */
AUX_CH_F,
+ AUX_CH_G,
};
#define aux_ch_name(a) ((a) + 'A')
@@ -350,7 +354,7 @@ enum phy_fia {
&(dev)->mode_config.plane_list, \
base.head) \
for_each_if((plane_mask) & \
- drm_plane_mask(&intel_plane->base)))
+ drm_plane_mask(&intel_plane->base))
#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \
list_for_each_entry(intel_plane, \
@@ -440,6 +444,14 @@ enum phy_fia {
(__i)--) \
for_each_if(crtc)
+#define intel_atomic_crtc_state_for_each_plane_state( \
+ plane, plane_state, \
+ crtc_state) \
+ for_each_intel_plane_mask(((crtc_state)->base.state->dev), (plane), \
+ ((crtc_state)->base.plane_mask)) \
+ for_each_if ((plane_state = \
+ to_intel_plane_state(__drm_atomic_get_current_plane_state((crtc_state)->base.state, &plane->base))))
+
void intel_link_compute_m_n(u16 bpp, int nlanes,
int pixel_clock, int link_clock,
struct intel_link_m_n *m_n,
@@ -453,6 +465,7 @@ enum drm_mode_status
intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
const struct drm_display_mode *mode);
enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port);
+bool is_trans_port_sync_mode(const struct intel_crtc_state *state);
void intel_plane_destroy(struct drm_plane *plane);
void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe);
@@ -531,8 +544,6 @@ void intel_dp_get_m_n(struct intel_crtc *crtc,
struct intel_crtc_state *pipe_config);
void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state,
enum link_m_n_set m_n);
-void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp,
- const struct intel_crtc_state *crtc_state);
int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n);
bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state,
struct dpll *best_clock);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 0616284c6da6..6f9e7927e248 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -695,7 +695,11 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv)
u32 mask;
mask = DC_STATE_EN_UPTO_DC5;
- if (INTEL_GEN(dev_priv) >= 11)
+
+ if (INTEL_GEN(dev_priv) >= 12)
+ mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6
+ | DC_STATE_EN_DC9;
+ else if (IS_GEN(dev_priv, 11))
mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9;
else if (IS_GEN9_LP(dev_priv))
mask |= DC_STATE_EN_DC9;
@@ -765,6 +769,52 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
dev_priv->csr.dc_state = val & mask;
}
+static u32
+sanitize_target_dc_state(struct drm_i915_private *dev_priv,
+ u32 target_dc_state)
+{
+ u32 states[] = {
+ DC_STATE_EN_UPTO_DC6,
+ DC_STATE_EN_UPTO_DC5,
+ DC_STATE_EN_DC3CO,
+ DC_STATE_DISABLE,
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
+ if (target_dc_state != states[i])
+ continue;
+
+ if (dev_priv->csr.allowed_dc_mask & target_dc_state)
+ break;
+
+ target_dc_state = states[i + 1];
+ }
+
+ return target_dc_state;
+}
+
+static void tgl_enable_dc3co(struct drm_i915_private *dev_priv)
+{
+ DRM_DEBUG_KMS("Enabling DC3CO\n");
+ gen9_set_dc_state(dev_priv, DC_STATE_EN_DC3CO);
+}
+
+static void tgl_disable_dc3co(struct drm_i915_private *dev_priv)
+{
+ u32 val;
+
+ DRM_DEBUG_KMS("Disabling DC3CO\n");
+ val = I915_READ(DC_STATE_EN);
+ val &= ~DC_STATE_DC3CO_STATUS;
+ I915_WRITE(DC_STATE_EN, val);
+ gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+ /*
+ * Delay of 200us DC3CO Exit time B.Spec 49196
+ */
+ usleep_range(200, 210);
+}
+
static void bxt_enable_dc9(struct drm_i915_private *dev_priv)
{
assert_can_enable_dc9(dev_priv);
@@ -820,6 +870,51 @@ lookup_power_well(struct drm_i915_private *dev_priv,
return &dev_priv->power_domains.power_wells[0];
}
+/**
+ * intel_display_power_set_target_dc_state - Set target dc state.
+ * @dev_priv: i915 device
+ * @state: state which needs to be set as target_dc_state.
+ *
+ * This function set the "DC off" power well target_dc_state,
+ * based upon this target_dc_stste, "DC off" power well will
+ * enable desired DC state.
+ */
+void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv,
+ u32 state)
+{
+ struct i915_power_well *power_well;
+ bool dc_off_enabled;
+ struct i915_power_domains *power_domains = &dev_priv->power_domains;
+
+ mutex_lock(&power_domains->lock);
+ power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF);
+
+ if (WARN_ON(!power_well))
+ goto unlock;
+
+ state = sanitize_target_dc_state(dev_priv, state);
+
+ if (state == dev_priv->csr.target_dc_state)
+ goto unlock;
+
+ dc_off_enabled = power_well->desc->ops->is_enabled(dev_priv,
+ power_well);
+ /*
+ * If DC off power well is disabled, need to enable and disable the
+ * DC off power well to effect target DC state.
+ */
+ if (!dc_off_enabled)
+ power_well->desc->ops->enable(dev_priv, power_well);
+
+ dev_priv->csr.target_dc_state = state;
+
+ if (!dc_off_enabled)
+ power_well->desc->ops->disable(dev_priv, power_well);
+
+unlock:
+ mutex_unlock(&power_domains->lock);
+}
+
static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
{
bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
@@ -932,7 +1027,8 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv)
static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
struct i915_power_well *power_well)
{
- return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0;
+ return ((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 &&
+ (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0);
}
static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
@@ -948,6 +1044,11 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
{
struct intel_cdclk_state cdclk_state = {};
+ if (dev_priv->csr.target_dc_state == DC_STATE_EN_DC3CO) {
+ tgl_disable_dc3co(dev_priv);
+ return;
+ }
+
gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
dev_priv->display.get_cdclk(dev_priv, &cdclk_state);
@@ -980,10 +1081,17 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
if (!dev_priv->csr.dmc_payload)
return;
- if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6)
+ switch (dev_priv->csr.target_dc_state) {
+ case DC_STATE_EN_DC3CO:
+ tgl_enable_dc3co(dev_priv);
+ break;
+ case DC_STATE_EN_UPTO_DC6:
skl_enable_dc6(dev_priv);
- else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5)
+ break;
+ case DC_STATE_EN_UPTO_DC5:
gen9_enable_dc5(dev_priv);
+ break;
+ }
}
static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv,
@@ -2931,7 +3039,7 @@ static const struct i915_power_well_desc skl_power_wells[] = {
.name = "DC off",
.domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS,
.ops = &gen9_dc_off_power_well_ops,
- .id = DISP_PW_ID_NONE,
+ .id = SKL_DISP_DC_OFF,
},
{
.name = "power well 2",
@@ -3013,7 +3121,7 @@ static const struct i915_power_well_desc bxt_power_wells[] = {
.name = "DC off",
.domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS,
.ops = &gen9_dc_off_power_well_ops,
- .id = DISP_PW_ID_NONE,
+ .id = SKL_DISP_DC_OFF,
},
{
.name = "power well 2",
@@ -3073,7 +3181,7 @@ static const struct i915_power_well_desc glk_power_wells[] = {
.name = "DC off",
.domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS,
.ops = &gen9_dc_off_power_well_ops,
- .id = DISP_PW_ID_NONE,
+ .id = SKL_DISP_DC_OFF,
},
{
.name = "power well 2",
@@ -3242,7 +3350,7 @@ static const struct i915_power_well_desc cnl_power_wells[] = {
.name = "DC off",
.domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS,
.ops = &gen9_dc_off_power_well_ops,
- .id = DISP_PW_ID_NONE,
+ .id = SKL_DISP_DC_OFF,
},
{
.name = "power well 2",
@@ -3370,7 +3478,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
.name = "DC off",
.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
.ops = &gen9_dc_off_power_well_ops,
- .id = DISP_PW_ID_NONE,
+ .id = SKL_DISP_DC_OFF,
},
{
.name = "power well 2",
@@ -3603,7 +3711,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
.name = "DC off",
.domains = TGL_DISPLAY_DC_OFF_POWER_DOMAINS,
.ops = &gen9_dc_off_power_well_ops,
- .id = DISP_PW_ID_NONE,
+ .id = SKL_DISP_DC_OFF,
},
{
.name = "power well 2",
@@ -3924,14 +4032,17 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
int requested_dc;
int max_dc;
- if (INTEL_GEN(dev_priv) >= 11) {
- max_dc = 2;
+ if (INTEL_GEN(dev_priv) >= 12) {
+ max_dc = 4;
/*
* DC9 has a separate HW flow from the rest of the DC states,
* not depending on the DMC firmware. It's needed by system
* suspend/resume, so allow it unconditionally.
*/
mask = DC_STATE_EN_DC9;
+ } else if (IS_GEN(dev_priv, 11)) {
+ max_dc = 2;
+ mask = DC_STATE_EN_DC9;
} else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) {
max_dc = 2;
mask = 0;
@@ -3950,7 +4061,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
requested_dc = enable_dc;
} else if (enable_dc == -1) {
requested_dc = max_dc;
- } else if (enable_dc > max_dc && enable_dc <= 2) {
+ } else if (enable_dc > max_dc && enable_dc <= 4) {
DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n",
enable_dc, max_dc);
requested_dc = max_dc;
@@ -3959,10 +4070,20 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
requested_dc = max_dc;
}
- if (requested_dc > 1)
+ switch (requested_dc) {
+ case 4:
+ mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6;
+ break;
+ case 3:
+ mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC5;
+ break;
+ case 2:
mask |= DC_STATE_EN_UPTO_DC6;
- if (requested_dc > 0)
+ break;
+ case 1:
mask |= DC_STATE_EN_UPTO_DC5;
+ break;
+ }
DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask);
@@ -4023,6 +4144,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
dev_priv->csr.allowed_dc_mask =
get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc);
+ dev_priv->csr.target_dc_state =
+ sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+
BUILD_BUG_ON(POWER_DOMAIN_NUM > 64);
mutex_init(&power_domains->lock);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index 737b5def7fc6..1da04f3e0fb3 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -100,6 +100,7 @@ enum i915_power_well_id {
SKL_DISP_PW_MISC_IO,
SKL_DISP_PW_1,
SKL_DISP_PW_2,
+ SKL_DISP_DC_OFF,
};
#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
@@ -256,6 +257,8 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915);
void intel_display_power_resume_early(struct drm_i915_private *i915);
void intel_display_power_suspend(struct drm_i915_private *i915);
void intel_display_power_resume(struct drm_i915_private *i915);
+void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv,
+ u32 state);
const char *
intel_display_power_domain_str(enum intel_display_power_domain domain);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 1602aac7ca0f..8358152e403e 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -870,6 +870,7 @@ struct intel_crtc_state {
bool has_psr;
bool has_psr2;
+ u32 dc3co_exitline;
/*
* Frequence the dpll for the port should run at. Differs from the
@@ -990,6 +991,12 @@ struct intel_crtc_state {
/* Forward Error correction State */
bool fec_enable;
+
+ /* Pointer to master transcoder in case of tiled displays */
+ enum transcoder master_transcoder;
+
+ /* Bitmask to indicate slaves attached */
+ u8 sync_mode_slaves_mask;
};
struct intel_crtc {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 0e45c61d7331..5eeafa45831a 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -591,6 +591,25 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
return 0;
}
+static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv,
+ int hdisplay)
+{
+ /*
+ * Older platforms don't like hdisplay==4096 with DP.
+ *
+ * On ILK/SNB/IVB the pipe seems to be somewhat running (scanline
+ * and frame counter increment), but we don't get vblank interrupts,
+ * and the pipe underruns immediately. The link also doesn't seem
+ * to get trained properly.
+ *
+ * On CHV the vblank interrupts don't seem to disappear but
+ * otherwise the symptoms are similar.
+ *
+ * TODO: confirm the behaviour on HSW+
+ */
+ return hdisplay == 4096 && !HAS_DDI(dev_priv);
+}
+
static enum drm_mode_status
intel_dp_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
@@ -626,6 +645,9 @@ intel_dp_mode_valid(struct drm_connector *connector,
max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
mode_rate = intel_dp_link_required(target_clock, 18);
+ if (intel_dp_hdisplay_bad(dev_priv, mode->hdisplay))
+ return MODE_H_ILLEGAL;
+
/*
* Output bpp is stored in 6.4 format so right shift by 4 to get the
* integer value since we support only integer values of bpp.
@@ -1644,6 +1666,7 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp)
case AUX_CH_D:
case AUX_CH_E:
case AUX_CH_F:
+ case AUX_CH_G:
return DP_AUX_CH_CTL(aux_ch);
default:
MISSING_CASE(aux_ch);
@@ -1664,6 +1687,7 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index)
case AUX_CH_D:
case AUX_CH_E:
case AUX_CH_F:
+ case AUX_CH_G:
return DP_AUX_CH_DATA(aux_ch, index);
default:
MISSING_CASE(aux_ch);
@@ -2297,6 +2321,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
pipe_config->has_pch_encoder = true;
pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
+
if (lspcon->active)
lspcon_ycbcr420_config(&intel_connector->base, pipe_config);
else
@@ -2342,6 +2367,9 @@ intel_dp_compute_config(struct intel_encoder *encoder,
if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
return -EINVAL;
+ if (intel_dp_hdisplay_bad(dev_priv, adjusted_mode->crtc_hdisplay))
+ return -EINVAL;
+
ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state);
if (ret < 0)
return ret;
@@ -4450,7 +4478,7 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
&dp_to_dig_port(intel_dp)->base;
bool sink_can_mst = intel_dp_sink_can_mst(intel_dp);
- DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support? port: %s, sink: %s, modparam: %s\n",
+ DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n",
encoder->base.base.id, encoder->base.name,
yesno(intel_dp->can_mst), yesno(sink_can_mst),
yesno(i915_modparams.enable_dp_mst));
@@ -4473,9 +4501,36 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector)
DP_DPRX_ESI_LEN;
}
+bool
+intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
+{
+ /*
+ * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication
+ * of Color Encoding Format and Content Color Gamut], in order to
+ * sending YCBCR 420 or HDR BT.2020 signals we should use DP VSC SDP.
+ */
+ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+ return true;
+
+ switch (conn_state->colorspace) {
+ case DRM_MODE_COLORIMETRY_SYCC_601:
+ case DRM_MODE_COLORIMETRY_OPYCC_601:
+ case DRM_MODE_COLORIMETRY_BT2020_YCC:
+ case DRM_MODE_COLORIMETRY_BT2020_RGB:
+ case DRM_MODE_COLORIMETRY_BT2020_CYCC:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
static void
-intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
- const struct intel_crtc_state *crtc_state)
+intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
{
struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
struct dp_sdp vsc_sdp = {};
@@ -4496,13 +4551,55 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
*/
vsc_sdp.sdp_header.HB3 = 0x13;
- /*
- * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h
- * DB16[3:0] DP 1.4a spec, Table 2-120
- */
- vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/
- /* RGB->YCBCR color conversion uses the BT.709 color space. */
- vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */
+ /* DP 1.4a spec, Table 2-120 */
+ switch (crtc_state->output_format) {
+ case INTEL_OUTPUT_FORMAT_YCBCR444:
+ vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */
+ break;
+ case INTEL_OUTPUT_FORMAT_YCBCR420:
+ vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */
+ break;
+ case INTEL_OUTPUT_FORMAT_RGB:
+ default:
+ /* RGB: DB16[7:4] = 0h */
+ break;
+ }
+
+ switch (conn_state->colorspace) {
+ case DRM_MODE_COLORIMETRY_BT709_YCC:
+ vsc_sdp.db[16] |= 0x1;
+ break;
+ case DRM_MODE_COLORIMETRY_XVYCC_601:
+ vsc_sdp.db[16] |= 0x2;
+ break;
+ case DRM_MODE_COLORIMETRY_XVYCC_709:
+ vsc_sdp.db[16] |= 0x3;
+ break;
+ case DRM_MODE_COLORIMETRY_SYCC_601:
+ vsc_sdp.db[16] |= 0x4;
+ break;
+ case DRM_MODE_COLORIMETRY_OPYCC_601:
+ vsc_sdp.db[16] |= 0x5;
+ break;
+ case DRM_MODE_COLORIMETRY_BT2020_CYCC:
+ case DRM_MODE_COLORIMETRY_BT2020_RGB:
+ vsc_sdp.db[16] |= 0x6;
+ break;
+ case DRM_MODE_COLORIMETRY_BT2020_YCC:
+ vsc_sdp.db[16] |= 0x7;
+ break;
+ case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65:
+ case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER:
+ vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */
+ break;
+ default:
+ /* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */
+
+ /* RGB->YCBCR color conversion uses the BT.709 color space. */
+ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+ vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */
+ break;
+ }
/*
* For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only,
@@ -4554,13 +4651,106 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp));
}
-void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp,
- const struct intel_crtc_state *crtc_state)
+static void
+intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
{
- if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420)
+ struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+ struct dp_sdp infoframe_sdp = {};
+ struct hdmi_drm_infoframe drm_infoframe = {};
+ const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE;
+ unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE];
+ ssize_t len;
+ int ret;
+
+ ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state);
+ if (ret) {
+ DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n");
+ return;
+ }
+
+ len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf));
+ if (len < 0) {
+ DRM_DEBUG_KMS("buffer size is smaller than hdr metadata infoframe\n");
+ return;
+ }
+
+ if (len != infoframe_size) {
+ DRM_DEBUG_KMS("wrong static hdr metadata size\n");
return;
+ }
- intel_pixel_encoding_setup_vsc(intel_dp, crtc_state);
+ /*
+ * Set up the infoframe sdp packet for HDR static metadata.
+ * Prepare VSC Header for SU as per DP 1.4a spec,
+ * Table 2-100 and Table 2-101
+ */
+
+ /* Packet ID, 00h for non-Audio INFOFRAME */
+ infoframe_sdp.sdp_header.HB0 = 0;
+ /*
+ * Packet Type 80h + Non-audio INFOFRAME Type value
+ * HDMI_INFOFRAME_TYPE_DRM: 0x87,
+ */
+ infoframe_sdp.sdp_header.HB1 = drm_infoframe.type;
+ /*
+ * Least Significant Eight Bits of (Data Byte Count – 1)
+ * infoframe_size - 1,
+ */
+ infoframe_sdp.sdp_header.HB2 = 0x1D;
+ /* INFOFRAME SDP Version Number */
+ infoframe_sdp.sdp_header.HB3 = (0x13 << 2);
+ /* CTA Header Byte 2 (INFOFRAME Version Number) */
+ infoframe_sdp.db[0] = drm_infoframe.version;
+ /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */
+ infoframe_sdp.db[1] = drm_infoframe.length;
+ /*
+ * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after
+ * HDMI_INFOFRAME_HEADER_SIZE
+ */
+ BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2);
+ memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE],
+ HDMI_DRM_INFOFRAME_SIZE);
+
+ /*
+ * Size of DP infoframe sdp packet for HDR static metadata is consist of
+ * - DP SDP Header(struct dp_sdp_header): 4 bytes
+ * - Two Data Blocks: 2 bytes
+ * CTA Header Byte2 (INFOFRAME Version Number)
+ * CTA Header Byte3 (Length of INFOFRAME)
+ * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes
+ *
+ * Prior to GEN11's GMP register size is identical to DP HDR static metadata
+ * infoframe size. But GEN11+ has larger than that size, write_infoframe
+ * will pad rest of the size.
+ */
+ intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state,
+ HDMI_PACKET_TYPE_GAMUT_METADATA,
+ &infoframe_sdp,
+ sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE);
+}
+
+void intel_dp_vsc_enable(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
+{
+ if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+ return;
+
+ intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state);
+}
+
+void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
+{
+ if (!conn_state->hdr_output_metadata)
+ return;
+
+ intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp,
+ crtc_state,
+ conn_state);
}
static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
@@ -5282,6 +5472,9 @@ static bool icl_combo_port_connected(struct drm_i915_private *dev_priv,
{
enum port port = intel_dig_port->base.port;
+ if (HAS_PCH_MCC(dev_priv) && port == PORT_C)
+ return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1);
+
return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port);
}
@@ -6402,6 +6595,13 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect
else if (INTEL_GEN(dev_priv) >= 5)
drm_connector_attach_max_bpc_property(connector, 6, 12);
+ intel_attach_colorspace_property(connector);
+
+ if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 11)
+ drm_object_attach_property(&connector->base,
+ connector->dev->mode_config.hdr_output_metadata_property,
+ 0);
+
if (intel_dp_is_edp(intel_dp)) {
u32 allowed_scalers;
@@ -7268,11 +7468,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
intel_connector->get_hw_state = intel_connector_get_hw_state;
/* init MST on ports that can support it */
- if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) &&
- (port == PORT_B || port == PORT_C ||
- port == PORT_D || port == PORT_F))
- intel_dp_mst_encoder_init(intel_dig_port,
- intel_connector->base.base.id);
+ intel_dp_mst_encoder_init(intel_dig_port,
+ intel_connector->base.base.id);
if (!intel_edp_init_connector(intel_dp, intel_connector)) {
intel_dp_aux_fini(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index a194b5b6da05..3da166054788 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -108,6 +108,14 @@ bool intel_dp_read_dpcd(struct intel_dp *intel_dp);
bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp);
int intel_dp_link_required(int pixel_clock, int bpp);
int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
+bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state);
+void intel_dp_vsc_enable(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state);
+void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state);
bool intel_digital_port_connected(struct intel_encoder *encoder);
static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 2203be28ea01..bbcab27644dc 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -655,21 +655,31 @@ intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port)
int
intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_base_id)
{
+ struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
struct intel_dp *intel_dp = &intel_dig_port->dp;
- struct drm_device *dev = intel_dig_port->base.base.dev;
+ enum port port = intel_dig_port->base.port;
int ret;
- intel_dp->can_mst = true;
+ if (!HAS_DP_MST(i915) || intel_dp_is_edp(intel_dp))
+ return 0;
+
+ if (INTEL_GEN(i915) < 12 && port == PORT_A)
+ return 0;
+
+ if (INTEL_GEN(i915) < 11 && port == PORT_E)
+ return 0;
+
intel_dp->mst_mgr.cbs = &mst_cbs;
/* create encoders */
intel_dp_create_fake_mst_encoders(intel_dig_port);
- ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, dev,
+ ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm,
&intel_dp->aux, 16, 3, conn_base_id);
- if (ret) {
- intel_dp->can_mst = false;
+ if (ret)
return ret;
- }
+
+ intel_dp->can_mst = true;
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 5e9e84c94a15..ec10fa7d3c69 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -247,8 +247,7 @@ static struct intel_shared_dpll *
intel_find_shared_dpll(struct intel_atomic_state *state,
const struct intel_crtc *crtc,
const struct intel_dpll_hw_state *pll_state,
- enum intel_dpll_id range_min,
- enum intel_dpll_id range_max)
+ unsigned long dpll_mask)
{
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
struct intel_shared_dpll *pll, *unused_pll = NULL;
@@ -257,7 +256,9 @@ intel_find_shared_dpll(struct intel_atomic_state *state,
shared_dpll = intel_atomic_get_shared_dpll_state(&state->base);
- for (i = range_min; i <= range_max; i++) {
+ WARN_ON(dpll_mask & ~(BIT(I915_NUM_PLLS) - 1));
+
+ for_each_set_bit(i, &dpll_mask, I915_NUM_PLLS) {
pll = &dev_priv->shared_dplls[i];
/* Only want to check enabled timings first */
@@ -464,8 +465,8 @@ static bool ibx_get_dpll(struct intel_atomic_state *state,
} else {
pll = intel_find_shared_dpll(state, crtc,
&crtc_state->dpll_hw_state,
- DPLL_ID_PCH_PLL_A,
- DPLL_ID_PCH_PLL_B);
+ BIT(DPLL_ID_PCH_PLL_B) |
+ BIT(DPLL_ID_PCH_PLL_A));
}
if (!pll)
@@ -814,7 +815,8 @@ hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state,
pll = intel_find_shared_dpll(state, crtc,
&crtc_state->dpll_hw_state,
- DPLL_ID_WRPLL1, DPLL_ID_WRPLL2);
+ BIT(DPLL_ID_WRPLL2) |
+ BIT(DPLL_ID_WRPLL1));
if (!pll)
return NULL;
@@ -877,7 +879,7 @@ static bool hsw_get_dpll(struct intel_atomic_state *state,
pll = intel_find_shared_dpll(state, crtc,
&crtc_state->dpll_hw_state,
- DPLL_ID_SPLL, DPLL_ID_SPLL);
+ BIT(DPLL_ID_SPLL));
} else {
return false;
}
@@ -1447,13 +1449,13 @@ static bool skl_get_dpll(struct intel_atomic_state *state,
if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP))
pll = intel_find_shared_dpll(state, crtc,
&crtc_state->dpll_hw_state,
- DPLL_ID_SKL_DPLL0,
- DPLL_ID_SKL_DPLL0);
+ BIT(DPLL_ID_SKL_DPLL0));
else
pll = intel_find_shared_dpll(state, crtc,
&crtc_state->dpll_hw_state,
- DPLL_ID_SKL_DPLL1,
- DPLL_ID_SKL_DPLL3);
+ BIT(DPLL_ID_SKL_DPLL3) |
+ BIT(DPLL_ID_SKL_DPLL2) |
+ BIT(DPLL_ID_SKL_DPLL1));
if (!pll)
return false;
@@ -2401,8 +2403,9 @@ static bool cnl_get_dpll(struct intel_atomic_state *state,
pll = intel_find_shared_dpll(state, crtc,
&crtc_state->dpll_hw_state,
- DPLL_ID_SKL_DPLL0,
- DPLL_ID_SKL_DPLL2);
+ BIT(DPLL_ID_SKL_DPLL2) |
+ BIT(DPLL_ID_SKL_DPLL1) |
+ BIT(DPLL_ID_SKL_DPLL0));
if (!pll) {
DRM_DEBUG_KMS("No PLL selected\n");
return false;
@@ -2975,7 +2978,7 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state,
&crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT];
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
enum port port = encoder->port;
- bool has_dpll4 = false;
+ unsigned long dpll_mask;
if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) {
DRM_DEBUG_KMS("Could not calculate combo PHY PLL state.\n");
@@ -2984,13 +2987,16 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state,
}
if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A)
- has_dpll4 = true;
+ dpll_mask =
+ BIT(DPLL_ID_EHL_DPLL4) |
+ BIT(DPLL_ID_ICL_DPLL1) |
+ BIT(DPLL_ID_ICL_DPLL0);
+ else
+ dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0);
port_dpll->pll = intel_find_shared_dpll(state, crtc,
&port_dpll->hw_state,
- DPLL_ID_ICL_DPLL0,
- has_dpll4 ? DPLL_ID_EHL_DPLL4
- : DPLL_ID_ICL_DPLL1);
+ dpll_mask);
if (!port_dpll->pll) {
DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n",
encoder->base.base.id, encoder->base.name);
@@ -3023,8 +3029,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state,
port_dpll->pll = intel_find_shared_dpll(state, crtc,
&port_dpll->hw_state,
- DPLL_ID_ICL_TBTPLL,
- DPLL_ID_ICL_TBTPLL);
+ BIT(DPLL_ID_ICL_TBTPLL));
if (!port_dpll->pll) {
DRM_DEBUG_KMS("No TBT-ALT PLL found\n");
return false;
@@ -3043,8 +3048,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state,
encoder->port));
port_dpll->pll = intel_find_shared_dpll(state, crtc,
&port_dpll->hw_state,
- dpll_id,
- dpll_id);
+ BIT(dpll_id));
if (!port_dpll->pll) {
DRM_DEBUG_KMS("No MG PHY PLL found\n");
goto err_unreference_tbt_pll;
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 0a6846c5ba95..b54ccbb5aad5 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -189,13 +189,19 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv,
}
}
-static int hsw_dip_data_size(unsigned int type)
+static int hsw_dip_data_size(struct drm_i915_private *dev_priv,
+ unsigned int type)
{
switch (type) {
case DP_SDP_VSC:
return VIDEO_DIP_VSC_DATA_SIZE;
case DP_SDP_PPS:
return VIDEO_DIP_PPS_DATA_SIZE;
+ case HDMI_PACKET_TYPE_GAMUT_METADATA:
+ if (INTEL_GEN(dev_priv) >= 11)
+ return VIDEO_DIP_GMP_DATA_SIZE;
+ else
+ return VIDEO_DIP_DATA_SIZE;
default:
return VIDEO_DIP_DATA_SIZE;
}
@@ -514,7 +520,9 @@ static void hsw_write_infoframe(struct intel_encoder *encoder,
int i;
u32 val = I915_READ(ctl_reg);
- data_size = hsw_dip_data_size(type);
+ data_size = hsw_dip_data_size(dev_priv, type);
+
+ WARN_ON(len > data_size);
val &= ~hsw_infoframe_enable(type);
I915_WRITE(ctl_reg, val);
@@ -1527,27 +1535,17 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port)
return true;
}
-struct hdcp2_hdmi_msg_data {
+struct hdcp2_hdmi_msg_timeout {
u8 msg_id;
- u32 timeout;
- u32 timeout2;
+ u16 timeout;
};
-static const struct hdcp2_hdmi_msg_data hdcp2_msg_data[] = {
- { HDCP_2_2_AKE_INIT, 0, 0 },
- { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0 },
- { HDCP_2_2_AKE_NO_STORED_KM, 0, 0 },
- { HDCP_2_2_AKE_STORED_KM, 0, 0 },
- { HDCP_2_2_AKE_SEND_HPRIME, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS,
- HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS },
- { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, 0 },
- { HDCP_2_2_LC_INIT, 0, 0 },
- { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, 0 },
- { HDCP_2_2_SKE_SEND_EKS, 0, 0 },
- { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 },
- { HDCP_2_2_REP_SEND_ACK, 0, 0 },
- { HDCP_2_2_REP_STREAM_MANAGE, 0, 0 },
- { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 },
+static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = {
+ { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, },
+ { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, },
+ { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, },
+ { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, },
+ { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, },
};
static
@@ -1564,12 +1562,17 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired)
{
int i;
- for (i = 0; i < ARRAY_SIZE(hdcp2_msg_data); i++)
- if (hdcp2_msg_data[i].msg_id == msg_id &&
- (msg_id != HDCP_2_2_AKE_SEND_HPRIME || is_paired))
- return hdcp2_msg_data[i].timeout;
- else if (hdcp2_msg_data[i].msg_id == msg_id)
- return hdcp2_msg_data[i].timeout2;
+ if (msg_id == HDCP_2_2_AKE_SEND_HPRIME) {
+ if (is_paired)
+ return HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS;
+ else
+ return HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) {
+ if (hdcp2_msg_timeout[i].msg_id == msg_id)
+ return hdcp2_msg_timeout[i].timeout;
+ }
return -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index b3c7eef53bf3..50f22abcd30e 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -534,6 +534,73 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans)
return trans == TRANSCODER_EDP;
}
+static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
+{
+ if (!cstate || !cstate->base.active)
+ return 0;
+
+ return DIV_ROUND_UP(1000 * 1000,
+ drm_mode_vrefresh(&cstate->base.adjusted_mode));
+}
+
+static void psr2_program_idle_frames(struct drm_i915_private *dev_priv,
+ u32 idle_frames)
+{
+ u32 val;
+
+ idle_frames <<= EDP_PSR2_IDLE_FRAME_SHIFT;
+ val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder));
+ val &= ~EDP_PSR2_IDLE_FRAME_MASK;
+ val |= idle_frames;
+ I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val);
+}
+
+static void tgl_psr2_enable_dc3co(struct drm_i915_private *dev_priv)
+{
+ psr2_program_idle_frames(dev_priv, 0);
+ intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO);
+}
+
+static void tgl_psr2_disable_dc3co(struct drm_i915_private *dev_priv)
+{
+ int idle_frames;
+
+ intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+ /*
+ * Restore PSR2 idle frame let's use 6 as the minimum to cover all known
+ * cases including the off-by-one issue that HW has in some cases.
+ */
+ idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
+ idle_frames = max(idle_frames, dev_priv->psr.sink_sync_latency + 1);
+ psr2_program_idle_frames(dev_priv, idle_frames);
+}
+
+static void tgl_dc5_idle_thread(struct work_struct *work)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(work, typeof(*dev_priv), psr.idle_work.work);
+
+ mutex_lock(&dev_priv->psr.lock);
+ /* If delayed work is pending, it is not idle */
+ if (delayed_work_pending(&dev_priv->psr.idle_work))
+ goto unlock;
+
+ DRM_DEBUG_KMS("DC5/6 idle thread\n");
+ tgl_psr2_disable_dc3co(dev_priv);
+unlock:
+ mutex_unlock(&dev_priv->psr.lock);
+}
+
+static void tgl_disallow_dc3co_on_psr2_exit(struct drm_i915_private *dev_priv)
+{
+ if (!dev_priv->psr.dc3co_enabled)
+ return;
+
+ cancel_delayed_work(&dev_priv->psr.idle_work);
+ /* Before PSR2 exit disallow dc3co*/
+ tgl_psr2_disable_dc3co(dev_priv);
+}
+
static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
struct intel_crtc_state *crtc_state)
{
@@ -746,6 +813,8 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv,
dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state);
dev_priv->psr.busy_frontbuffer_bits = 0;
dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe;
+ dev_priv->psr.dc3co_enabled = !!crtc_state->dc3co_exitline;
+ dev_priv->psr.dc3co_exit_delay = intel_get_frame_time_us(crtc_state);
dev_priv->psr.transcoder = crtc_state->cpu_transcoder;
/*
@@ -829,6 +898,7 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv)
}
if (dev_priv->psr.psr2_enabled) {
+ tgl_disallow_dc3co_on_psr2_exit(dev_priv);
val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder));
WARN_ON(!(val & EDP_PSR2_ENABLE));
val &= ~EDP_PSR2_ENABLE;
@@ -901,6 +971,7 @@ void intel_psr_disable(struct intel_dp *intel_dp,
mutex_unlock(&dev_priv->psr.lock);
cancel_work_sync(&dev_priv->psr.work);
+ cancel_delayed_work_sync(&dev_priv->psr.idle_work);
}
static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv)
@@ -1208,6 +1279,44 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv,
mutex_unlock(&dev_priv->psr.lock);
}
+/*
+ * When we will be completely rely on PSR2 S/W tracking in future,
+ * intel_psr_flush() will invalidate and flush the PSR for ORIGIN_FLIP
+ * event also therefore tgl_dc3co_flush() require to be changed
+ * accrodingly in future.
+ */
+static void
+tgl_dc3co_flush(struct drm_i915_private *dev_priv,
+ unsigned int frontbuffer_bits, enum fb_op_origin origin)
+{
+ u32 delay;
+
+ mutex_lock(&dev_priv->psr.lock);
+
+ if (!dev_priv->psr.dc3co_enabled)
+ goto unlock;
+
+ if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
+ goto unlock;
+
+ /*
+ * At every frontbuffer flush flip event modified delay of delayed work,
+ * when delayed work schedules that means display has been idle.
+ */
+ if (!(frontbuffer_bits &
+ INTEL_FRONTBUFFER_ALL_MASK(dev_priv->psr.pipe)))
+ goto unlock;
+
+ tgl_psr2_enable_dc3co(dev_priv);
+ /* DC5/DC6 required idle frames = 6 */
+ delay = 6 * dev_priv->psr.dc3co_exit_delay;
+ mod_delayed_work(system_wq, &dev_priv->psr.idle_work,
+ usecs_to_jiffies(delay));
+
+unlock:
+ mutex_unlock(&dev_priv->psr.lock);
+}
+
/**
* intel_psr_flush - Flush PSR
* @dev_priv: i915 device
@@ -1227,8 +1336,10 @@ void intel_psr_flush(struct drm_i915_private *dev_priv,
if (!CAN_PSR(dev_priv))
return;
- if (origin == ORIGIN_FLIP)
+ if (origin == ORIGIN_FLIP) {
+ tgl_dc3co_flush(dev_priv, frontbuffer_bits, origin);
return;
+ }
mutex_lock(&dev_priv->psr.lock);
if (!dev_priv->psr.enabled) {
@@ -1284,6 +1395,7 @@ void intel_psr_init(struct drm_i915_private *dev_priv)
dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link;
INIT_WORK(&dev_priv->psr.work, intel_psr_work);
+ INIT_DELAYED_WORK(&dev_priv->psr.idle_work, tgl_dc5_idle_thread);
mutex_init(&dev_priv->psr.lock);
}
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 633fa8069348..5ae12ab3c5b7 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -287,10 +287,8 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state)
src_y = src->y1 >> 16;
src_h = drm_rect_height(src) >> 16;
- src->x1 = src_x << 16;
- src->x2 = (src_x + src_w) << 16;
- src->y1 = src_y << 16;
- src->y2 = (src_y + src_h) << 16;
+ drm_rect_init(src, src_x << 16, src_y << 16,
+ src_w << 16, src_h << 16);
if (!fb->format->is_yuv)
return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
index dfcd156b5094..e3045ced4bfe 100644
--- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h
+++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
@@ -291,6 +291,8 @@ struct bdb_general_features {
#define DVO_PORT_HDMIE 12 /* 193 */
#define DVO_PORT_DPF 13 /* N/A */
#define DVO_PORT_HDMIF 14 /* N/A */
+#define DVO_PORT_DPG 15
+#define DVO_PORT_HDMIG 16
#define DVO_PORT_MIPIA 21 /* 171 */
#define DVO_PORT_MIPIB 22 /* 171 */
#define DVO_PORT_MIPIC 23 /* 171 */
@@ -325,6 +327,7 @@ enum vbt_gmbus_ddi {
#define DP_AUX_D 0x30
#define DP_AUX_E 0x50
#define DP_AUX_F 0x60
+#define DP_AUX_G 0x70
#define VBT_DP_MAX_LINK_RATE_HBR3 0
#define VBT_DP_MAX_LINK_RATE_HBR2 1
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5d8221c7ba83..7b01f4605f21 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -203,15 +203,22 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
for_each_engine(engine, gt, id) {
struct intel_context *ce;
+ if (engine->legacy_idx == INVALID_ENGINE)
+ continue;
+
+ GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES);
+ GEM_BUG_ON(e->engines[engine->legacy_idx]);
+
ce = intel_context_create(ctx, engine);
if (IS_ERR(ce)) {
- __free_engines(e, id);
+ __free_engines(e, e->num_engines + 1);
return ERR_CAST(ce);
}
- e->engines[id] = ce;
- e->num_engines = id + 1;
+ e->engines[engine->legacy_idx] = ce;
+ e->num_engines = max(e->num_engines, engine->legacy_idx);
}
+ e->num_engines++;
return e;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 9234586830d1..cfe80590f0ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
}
+static inline bool
+i915_gem_context_nopreempt(const struct i915_gem_context *ctx)
+{
+ return test_bit(CONTEXT_NOPREEMPT, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_nopreempt(struct i915_gem_context *ctx)
+{
+ set_bit(CONTEXT_NOPREEMPT, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx)
+{
+ clear_bit(CONTEXT_NOPREEMPT, &ctx->flags);
+}
+
static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
{
return !ctx->file_priv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index ab8e1367dfc8..fe97b8ba4fda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -146,6 +146,7 @@ struct i915_gem_context {
#define CONTEXT_CLOSED 1
#define CONTEXT_FORCE_SINGLE_SUBMISSION 2
#define CONTEXT_USER_ENGINES 3
+#define CONTEXT_NOPREEMPT 4
struct mutex mutex;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 98816c35ffc3..e96901888323 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2077,6 +2077,9 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
+ if (i915_gem_context_nopreempt(eb->gem_context))
+ eb->request->flags |= I915_REQUEST_NOPREEMPT;
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index 0c41e04ab8fa..5ae694c24df4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -117,13 +117,6 @@ create_st:
goto err;
}
- /* Mark the pages as dontneed whilst they are still pinned. As soon
- * as they are unpinned they are allowed to be reaped by the shrinker,
- * and the caller is expected to repopulate - the contents of this
- * object are only valid whilst active and pinned.
- */
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
internal_free_pages(pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
@@ -188,6 +180,15 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+ /*
+ * Mark the object as volatile, such that the pages are marked as
+ * dontneed whilst they are still pinned. As soon as they are unpinned
+ * they are allowed to be reaped by the shrinker, and the caller is
+ * expected to repopulate - the contents of this object are only valid
+ * whilst active and pinned.
+ */
+ i915_gem_object_set_volatile(obj);
+
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 086a9bf5adcc..85921796851f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -25,10 +25,11 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj);
void i915_gem_object_init(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_object_ops *ops);
struct drm_i915_gem_object *
-i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+ resource_size_t size);
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
- const void *data, size_t size);
+ const void *data, resource_size_t size);
extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
@@ -140,6 +141,24 @@ i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
}
static inline bool
+i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj)
+{
+ return obj->flags & I915_BO_ALLOC_CONTIGUOUS;
+}
+
+static inline bool
+i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj)
+{
+ return obj->flags & I915_BO_ALLOC_VOLATILE;
+}
+
+static inline void
+i915_gem_object_set_volatile(struct drm_i915_gem_object *obj)
+{
+ obj->flags |= I915_BO_ALLOC_VOLATILE;
+}
+
+static inline bool
i915_gem_object_type_has(const struct drm_i915_gem_object *obj,
unsigned long flags)
{
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index c00b4f077f9e..a387e3ee728b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -119,6 +119,11 @@ struct drm_i915_gem_object {
I915_SELFTEST_DECLARE(struct list_head st_link);
+ unsigned long flags;
+#define I915_BO_ALLOC_CONTIGUOUS BIT(0)
+#define I915_BO_ALLOC_VOLATILE BIT(1)
+#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE)
+
/*
* Is the object to be mapped as read-only to the GPU
* Only honoured if hardware has relevant pte bit
@@ -160,6 +165,21 @@ struct drm_i915_gem_object {
atomic_t pages_pin_count;
atomic_t shrink_pin;
+ /**
+ * Memory region for this object.
+ */
+ struct intel_memory_region *region;
+ /**
+ * List of memory region blocks allocated for this object.
+ */
+ struct list_head blocks;
+ /**
+ * Element within memory_region->objects or region->purgeable
+ * if the object is marked as DONTNEED. Access is protected by
+ * region->obj_lock.
+ */
+ struct list_head region_link;
+
struct sg_table *pages;
void *mapping;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 2e941f093a20..b0ec0959c13f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -18,6 +18,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
lockdep_assert_held(&obj->mm.lock);
+ if (i915_gem_object_is_volatile(obj))
+ obj->mm.madv = I915_MADV_DONTNEED;
+
/* Make the pages coherent with the GPU (flushing any swapin). */
if (obj->cache_dirty) {
obj->write_domain = 0;
@@ -160,6 +163,9 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
if (IS_ERR_OR_NULL(pages))
return pages;
+ if (i915_gem_object_is_volatile(obj))
+ obj->mm.madv = I915_MADV_WILLNEED;
+
i915_gem_object_make_unshrinkable(obj);
if (obj->mm.mapping) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 768356908160..8043ff63d73f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -16,6 +16,7 @@
#include "gt/intel_gt.h"
#include "i915_drv.h"
#include "i915_gem_object.h"
+#include "i915_gem_region.h"
#include "i915_scatterlist.h"
static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
@@ -191,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
/* Perma-pin (until release) the physical set of pages */
__i915_gem_object_pin_pages(obj);
- if (!IS_ERR_OR_NULL(pages))
+ if (!IS_ERR_OR_NULL(pages)) {
i915_gem_shmem_ops.put_pages(obj, pages);
+ i915_gem_object_release_memory_region(obj);
+ }
mutex_unlock(&obj->mm.lock);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
new file mode 100644
index 000000000000..2f7bcfb9c964
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "i915_gem_region.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+
+void
+i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks);
+
+ obj->mm.dirty = false;
+ sg_free_table(pages);
+ kfree(pages);
+}
+
+int
+i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mem = obj->mm.region;
+ struct list_head *blocks = &obj->mm.blocks;
+ resource_size_t size = obj->base.size;
+ resource_size_t prev_end;
+ struct i915_buddy_block *block;
+ unsigned int flags;
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned int sg_page_sizes;
+ int ret;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ return -ENOMEM;
+
+ if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) {
+ kfree(st);
+ return -ENOMEM;
+ }
+
+ flags = I915_ALLOC_MIN_PAGE_SIZE;
+ if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
+ flags |= I915_ALLOC_CONTIGUOUS;
+
+ ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks);
+ if (ret)
+ goto err_free_sg;
+
+ GEM_BUG_ON(list_empty(blocks));
+
+ sg = st->sgl;
+ st->nents = 0;
+ sg_page_sizes = 0;
+ prev_end = (resource_size_t)-1;
+
+ list_for_each_entry(block, blocks, link) {
+ u64 block_size, offset;
+
+ block_size = min_t(u64, size,
+ i915_buddy_block_size(&mem->mm, block));
+ offset = i915_buddy_block_offset(block);
+
+ GEM_BUG_ON(overflows_type(block_size, sg->length));
+
+ if (offset != prev_end ||
+ add_overflows_t(typeof(sg->length), sg->length, block_size)) {
+ if (st->nents) {
+ sg_page_sizes |= sg->length;
+ sg = __sg_next(sg);
+ }
+
+ sg_dma_address(sg) = mem->region.start + offset;
+ sg_dma_len(sg) = block_size;
+
+ sg->length = block_size;
+
+ st->nents++;
+ } else {
+ sg->length += block_size;
+ sg_dma_len(sg) += block_size;
+ }
+
+ prev_end = offset + block_size;
+ };
+
+ sg_page_sizes |= sg->length;
+ sg_mark_end(sg);
+ i915_sg_trim(st);
+
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+ return 0;
+
+err_free_sg:
+ sg_free_table(st);
+ kfree(st);
+ return ret;
+}
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mem,
+ unsigned long flags)
+{
+ INIT_LIST_HEAD(&obj->mm.blocks);
+ obj->mm.region = intel_memory_region_get(mem);
+ obj->flags |= flags;
+
+ mutex_lock(&mem->objects.lock);
+
+ if (obj->flags & I915_BO_ALLOC_VOLATILE)
+ list_add(&obj->mm.region_link, &mem->objects.purgeable);
+ else
+ list_add(&obj->mm.region_link, &mem->objects.list);
+
+ mutex_unlock(&mem->objects.lock);
+}
+
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mem = obj->mm.region;
+
+ mutex_lock(&mem->objects.lock);
+ list_del(&obj->mm.region_link);
+ mutex_unlock(&mem->objects.lock);
+
+ intel_memory_region_put(mem);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
+{
+ struct drm_i915_gem_object *obj;
+
+ /*
+ * NB: Our use of resource_size_t for the size stems from using struct
+ * resource for the mem->region. We might need to revisit this in the
+ * future.
+ */
+
+ GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
+
+ if (!mem)
+ return ERR_PTR(-ENODEV);
+
+ size = round_up(size, mem->min_page_size);
+
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
+
+ /*
+ * XXX: There is a prevalence of the assumption that we fit the
+ * object's page count inside a 32bit _signed_ variable. Let's document
+ * this and catch if we ever need to fix it. In the meantime, if you do
+ * spot such a local variable, please consider fixing!
+ */
+
+ if (size >> PAGE_SHIFT > INT_MAX)
+ return ERR_PTR(-E2BIG);
+
+ if (overflows_type(size, obj->base.size))
+ return ERR_PTR(-E2BIG);
+
+ obj = mem->ops->create_object(mem, size, flags);
+ if (!IS_ERR(obj))
+ trace_i915_gem_object_create(obj);
+
+ return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
new file mode 100644
index 000000000000..f2ff6f8bff74
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_REGION_H__
+#define __I915_GEM_REGION_H__
+
+#include <linux/types.h>
+
+struct intel_memory_region;
+struct drm_i915_gem_object;
+struct sg_table;
+
+int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj);
+void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mem,
+ unsigned long flags);
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4c4954e8ce0a..be68b76e13b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -7,7 +7,9 @@
#include <linux/pagevec.h>
#include <linux/swap.h>
+#include "gem/i915_gem_region.h"
#include "i915_drv.h"
+#include "i915_gemfs.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
#include "i915_trace.h"
@@ -26,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec)
static int shmem_get_pages(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_memory_region *mem = obj->mm.region;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
unsigned long i;
struct address_space *mapping;
@@ -52,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
* If there's no chance of allocating enough pages for the whole
* object, bail early.
*/
- if (page_count > totalram_pages())
+ if (obj->base.size > resource_size(&mem->region))
return -ENOMEM;
st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -417,6 +420,8 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
static void shmem_release(struct drm_i915_gem_object *obj)
{
+ i915_gem_object_release_memory_region(obj);
+
fput(obj->base.filp);
}
@@ -434,9 +439,9 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
.release = shmem_release,
};
-static int create_shmem(struct drm_i915_private *i915,
- struct drm_gem_object *obj,
- size_t size)
+static int __create_shmem(struct drm_i915_private *i915,
+ struct drm_gem_object *obj,
+ resource_size_t size)
{
unsigned long flags = VM_NORESERVE;
struct file *filp;
@@ -455,31 +460,23 @@ static int create_shmem(struct drm_i915_private *i915,
return 0;
}
-struct drm_i915_gem_object *
-i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+static struct drm_i915_gem_object *
+create_shmem(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
{
+ struct drm_i915_private *i915 = mem->i915;
struct drm_i915_gem_object *obj;
struct address_space *mapping;
unsigned int cache_level;
gfp_t mask;
int ret;
- /* There is a prevalence of the assumption that we fit the object's
- * page count inside a 32bit _signed_ variable. Let's document this and
- * catch if we ever need to fix it. In the meantime, if you do spot
- * such a local variable, please consider fixing!
- */
- if (size >> PAGE_SHIFT > INT_MAX)
- return ERR_PTR(-E2BIG);
-
- if (overflows_type(size, obj->base.size))
- return ERR_PTR(-E2BIG);
-
obj = i915_gem_object_alloc();
if (!obj)
return ERR_PTR(-ENOMEM);
- ret = create_shmem(i915, &obj->base, size);
+ ret = __create_shmem(i915, &obj->base, size);
if (ret)
goto fail;
@@ -518,7 +515,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
i915_gem_object_set_cache_coherency(obj, cache_level);
- trace_i915_gem_object_create(obj);
+ i915_gem_object_init_memory_region(obj, mem, 0);
return obj;
@@ -527,14 +524,22 @@ fail:
return ERR_PTR(ret);
}
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+ resource_size_t size)
+{
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
+ size, 0);
+}
+
/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
- const void *data, size_t size)
+ const void *data, resource_size_t size)
{
struct drm_i915_gem_object *obj;
struct file *file;
- size_t offset;
+ resource_size_t offset;
int err;
obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
@@ -577,3 +582,35 @@ fail:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
+
+static int init_shmem(struct intel_memory_region *mem)
+{
+ int err;
+
+ err = i915_gemfs_init(mem->i915);
+ if (err) {
+ DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
+ err);
+ }
+
+ return 0; /* Don't error, we can simply fallback to the kernel mnt */
+}
+
+static void release_shmem(struct intel_memory_region *mem)
+{
+ i915_gemfs_fini(mem->i915);
+}
+
+static const struct intel_memory_region_ops shmem_region_ops = {
+ .init = init_shmem,
+ .release = release_shmem,
+ .create_object = create_shmem,
+};
+
+struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915)
+{
+ return intel_memory_region_create(i915, 0,
+ totalram_pages() << PAGE_SHIFT,
+ PAGE_SIZE, 0,
+ &shmem_region_ops);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c76260ce13e3..57cd8bc2657c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -10,6 +10,7 @@
#include <drm/drm_mm.h>
#include <drm/i915_drm.h>
+#include "gem/i915_gem_region.h"
#include "i915_drv.h"
#include "i915_gem_stolen.h"
@@ -150,7 +151,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
return 0;
}
-void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv)
+static void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv)
{
if (!drm_mm_initialized(&dev_priv->mm.stolen))
return;
@@ -355,7 +356,7 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
}
}
-int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
+static int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
{
resource_size_t reserved_base, stolen_top;
resource_size_t reserved_total, reserved_size;
@@ -539,6 +540,9 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
i915_gem_stolen_remove_node(dev_priv, stolen);
kfree(stolen);
+
+ if (obj->mm.region)
+ i915_gem_object_release_memory_region(obj);
}
static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
@@ -548,8 +552,9 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
};
static struct drm_i915_gem_object *
-_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
- struct drm_mm_node *stolen)
+__i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+ struct drm_mm_node *stolen,
+ struct intel_memory_region *mem)
{
struct drm_i915_gem_object *obj;
unsigned int cache_level;
@@ -571,6 +576,9 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
if (err)
goto cleanup;
+ if (mem)
+ i915_gem_object_init_memory_region(obj, mem, 0);
+
return obj;
cleanup:
@@ -579,10 +587,12 @@ err:
return ERR_PTR(err);
}
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
- resource_size_t size)
+static struct drm_i915_gem_object *
+_i915_gem_object_create_stolen(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
{
+ struct drm_i915_private *dev_priv = mem->i915;
struct drm_i915_gem_object *obj;
struct drm_mm_node *stolen;
int ret;
@@ -603,7 +613,7 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
goto err_free;
}
- obj = _i915_gem_object_create_stolen(dev_priv, stolen);
+ obj = __i915_gem_object_create_stolen(dev_priv, stolen, mem);
if (IS_ERR(obj))
goto err_remove;
@@ -617,6 +627,43 @@ err_free:
}
struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+ resource_size_t size)
+{
+ return i915_gem_object_create_region(dev_priv->mm.regions[INTEL_REGION_STOLEN],
+ size, I915_BO_ALLOC_CONTIGUOUS);
+}
+
+static int init_stolen(struct intel_memory_region *mem)
+{
+ /*
+ * Initialise stolen early so that we may reserve preallocated
+ * objects for the BIOS to KMS transition.
+ */
+ return i915_gem_init_stolen(mem->i915);
+}
+
+static void release_stolen(struct intel_memory_region *mem)
+{
+ i915_gem_cleanup_stolen(mem->i915);
+}
+
+static const struct intel_memory_region_ops i915_region_stolen_ops = {
+ .init = init_stolen,
+ .release = release_stolen,
+ .create_object = _i915_gem_object_create_stolen,
+};
+
+struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915)
+{
+ return intel_memory_region_create(i915,
+ intel_graphics_stolen_res.start,
+ resource_size(&intel_graphics_stolen_res),
+ PAGE_SIZE, 0,
+ &i915_region_stolen_ops);
+}
+
+struct drm_i915_gem_object *
i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
resource_size_t stolen_offset,
resource_size_t gtt_offset,
@@ -655,7 +702,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
return ERR_PTR(ret);
}
- obj = _i915_gem_object_create_stolen(dev_priv, stolen);
+ obj = __i915_gem_object_create_stolen(dev_priv, stolen, NULL);
if (IS_ERR(obj)) {
DRM_DEBUG_DRIVER("failed to allocate stolen object\n");
i915_gem_stolen_remove_node(dev_priv, stolen);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
index 2289644d8604..c1040627fbf3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -21,8 +21,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
u64 end);
void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
struct drm_mm_node *node);
-int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
-void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv);
+struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915);
struct drm_i915_gem_object *
i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
resource_size_t size);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index dc2a83ce44d5..1fa592d82af5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -348,9 +348,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
args->stride = 0;
} else {
if (args->tiling_mode == I915_TILING_X)
- args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x;
+ args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
else
- args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y;
+ args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
/* Hide bit 17 swizzling from the user. This prevents old Mesa
* from aborting the application on sw fallbacks to bit 17,
@@ -421,10 +421,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
switch (args->tiling_mode) {
case I915_TILING_X:
- args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
+ args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
break;
case I915_TILING_Y:
- args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
+ args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
break;
default:
case I915_TILING_NONE:
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index e42abddd4a36..f27772f6779a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -8,6 +8,7 @@
#include "i915_selftest.h"
+#include "gem/i915_gem_region.h"
#include "gem/i915_gem_pm.h"
#include "gt/intel_gt.h"
@@ -17,6 +18,7 @@
#include "selftests/mock_drm.h"
#include "selftests/mock_gem_device.h"
+#include "selftests/mock_region.h"
#include "selftests/i915_random.h"
static const unsigned int page_sizes[] = {
@@ -113,8 +115,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj)
if (i915_gem_gtt_prepare_pages(obj, st))
goto err;
- obj->mm.madv = I915_MADV_DONTNEED;
-
GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
@@ -135,7 +135,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
huge_pages_free_pages(pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops huge_page_ops = {
@@ -168,6 +167,8 @@ huge_pages_object(struct drm_i915_private *i915,
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &huge_page_ops);
+ i915_gem_object_set_volatile(obj);
+
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_NONE;
@@ -227,8 +228,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
i915_sg_trim(st);
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@@ -261,8 +260,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
sg_dma_len(sg) = obj->base.size;
sg_dma_address(sg) = page_size;
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, st, sg->length);
return 0;
@@ -281,7 +278,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
{
fake_free_huge_pages(obj, pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops fake_ops = {
@@ -321,6 +317,8 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
else
i915_gem_object_init(obj, &fake_ops);
+ i915_gem_object_set_volatile(obj);
+
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_NONE;
@@ -452,6 +450,88 @@ out_device:
return err;
}
+static int igt_mock_memory_region_huge_pages(void *arg)
+{
+ const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS };
+ struct i915_ppgtt *ppgtt = arg;
+ struct drm_i915_private *i915 = ppgtt->vm.i915;
+ unsigned long supported = INTEL_INFO(i915)->page_sizes;
+ struct intel_memory_region *mem;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int bit;
+ int err = 0;
+
+ mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+ if (IS_ERR(mem)) {
+ pr_err("%s failed to create memory region\n", __func__);
+ return PTR_ERR(mem);
+ }
+
+ for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+ unsigned int page_size = BIT(bit);
+ resource_size_t phys;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(flags); ++i) {
+ obj = i915_gem_object_create_region(mem, page_size,
+ flags[i]);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_region;
+ }
+
+ vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_close;
+
+ err = igt_check_page_sizes(vma);
+ if (err)
+ goto out_unpin;
+
+ phys = i915_gem_object_get_dma_address(obj, 0);
+ if (!IS_ALIGNED(phys, page_size)) {
+ pr_err("%s addr misaligned(%pa) page_size=%u\n",
+ __func__, &phys, page_size);
+ err = -EINVAL;
+ goto out_unpin;
+ }
+
+ if (vma->page_sizes.gtt != page_size) {
+ pr_err("%s page_sizes.gtt=%u, expected=%u\n",
+ __func__, vma->page_sizes.gtt,
+ page_size);
+ err = -EINVAL;
+ goto out_unpin;
+ }
+
+ i915_vma_unpin(vma);
+ i915_vma_close(vma);
+
+ __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ i915_gem_object_put(obj);
+ }
+ }
+
+ goto out_region;
+
+out_unpin:
+ i915_vma_unpin(vma);
+out_close:
+ i915_vma_close(vma);
+out_put:
+ i915_gem_object_put(obj);
+out_region:
+ intel_memory_region_put(mem);
+ return err;
+}
+
static int igt_mock_ppgtt_misaligned_dma(void *arg)
{
struct i915_ppgtt *ppgtt = arg;
@@ -1623,6 +1703,7 @@ int i915_gem_huge_page_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_mock_exhaust_device_supported_pages),
+ SUBTEST(igt_mock_memory_region_huge_pages),
SUBTEST(igt_mock_ppgtt_misaligned_dma),
SUBTEST(igt_mock_ppgtt_huge_fill),
SUBTEST(igt_mock_ppgtt_64K),
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index fb58c0919ea1..e5c235051ae5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -263,7 +263,7 @@ static int live_parallel_switch(void *arg)
if (!data) {
i915_gem_context_unlock_engines(ctx);
err = -ENOMEM;
- goto out;
+ goto out_file;
}
m = 0; /* Use the first context as our template for the engines */
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index cfa52c525691..65d4dbf91999 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -357,10 +357,10 @@ static int igt_partial_tiling(void *arg)
tile.tiling = tiling;
switch (tiling) {
case I915_TILING_X:
- tile.swizzle = i915->mm.bit_6_swizzle_x;
+ tile.swizzle = i915->ggtt.bit_6_swizzle_x;
break;
case I915_TILING_Y:
- tile.swizzle = i915->mm.bit_6_swizzle_y;
+ tile.swizzle = i915->ggtt.bit_6_swizzle_y;
break;
}
@@ -474,10 +474,10 @@ static int igt_smoke_tiling(void *arg)
break;
case I915_TILING_X:
- tile.swizzle = i915->mm.bit_6_swizzle_x;
+ tile.swizzle = i915->ggtt.bit_6_swizzle_x;
break;
case I915_TILING_Y:
- tile.swizzle = i915->mm.bit_6_swizzle_y;
+ tile.swizzle = i915->ggtt.bit_6_swizzle_y;
break;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index be34d97ac18f..59c3083c1ec1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -62,7 +62,7 @@ int __intel_context_do_pin(struct intel_context *ce)
}
err = 0;
- with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
err = ce->ops->pin(ce);
if (err)
goto err;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c9e8c8ccbd47..93ea367fe624 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
return READ_ONCE(*execlists->active);
}
+static inline void
+execlists_active_lock_bh(struct intel_engine_execlists *execlists)
+{
+ local_bh_disable(); /* prevent local softirq and lock recursion */
+ tasklet_lock(&execlists->tasklet);
+}
+
+static inline void
+execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
+{
+ tasklet_unlock(&execlists->tasklet);
+ local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
+}
+
struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
@@ -407,8 +421,9 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine,
engine->serial++; /* contexts lost */
}
-bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct intel_gt *gt);
+bool intel_engine_is_idle(struct intel_engine_cs *engine);
+void intel_engine_flush_submission(struct intel_engine_cs *engine);
void intel_engines_reset_default_submission(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 80fd072ac719..051734c9b733 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -277,6 +277,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
+ if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
+ return -EINVAL;
+
if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
return -EINVAL;
@@ -293,6 +296,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
engine->id = id;
+ engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
engine->i915 = gt->i915;
engine->gt = gt;
@@ -328,6 +332,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
intel_engine_sanitize_mmio(engine);
gt->engine_class[info->class][info->instance] = engine;
+ gt->engine[id] = engine;
intel_engine_add_user(engine);
gt->i915->engine[id] = engine;
@@ -736,6 +741,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
+ static struct lock_class_key kernel;
struct intel_context *ce;
int err;
@@ -751,6 +757,14 @@ create_kernel_context(struct intel_engine_cs *engine)
return ERR_PTR(err);
}
+ /*
+ * Give our perma-pinned kernel timelines a separate lockdep class,
+ * so that we can use them from within the normal user timelines
+ * should we need to inject GPU operations during their request
+ * construction.
+ */
+ lockdep_set_class(&ce->timeline->mutex, &kernel);
+
return ce;
}
@@ -1040,6 +1054,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
return idle;
}
+void intel_engine_flush_submission(struct intel_engine_cs *engine)
+{
+ struct tasklet_struct *t = &engine->execlists.tasklet;
+
+ if (__tasklet_is_scheduled(t)) {
+ local_bh_disable();
+ if (tasklet_trylock(t)) {
+ /* Must wait for any GPU reset in progress. */
+ if (__tasklet_is_enabled(t))
+ t->func(t->data);
+ tasklet_unlock(t);
+ }
+ local_bh_enable();
+ }
+
+ /* Otherwise flush the tasklet if it was running on another cpu */
+ tasklet_unlock_wait(t);
+}
+
/**
* intel_engine_is_idle() - Report if the engine has finished process all work
* @engine: the intel_engine_cs
@@ -1058,21 +1091,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
/* Waiting to drain ELSP? */
if (execlists_active(&engine->execlists)) {
- struct tasklet_struct *t = &engine->execlists.tasklet;
-
synchronize_hardirq(engine->i915->drm.pdev->irq);
- local_bh_disable();
- if (tasklet_trylock(t)) {
- /* Must wait for any GPU reset in progress. */
- if (__tasklet_is_enabled(t))
- t->func(t->data);
- tasklet_unlock(t);
- }
- local_bh_enable();
-
- /* Otherwise flush the tasklet if it was on another cpu */
- tasklet_unlock_wait(t);
+ intel_engine_flush_submission(engine);
if (execlists_active(&engine->execlists))
return false;
@@ -1102,7 +1123,7 @@ bool intel_engines_are_idle(struct intel_gt *gt)
if (!READ_ONCE(gt->awake))
return true;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_is_idle(engine))
return false;
}
@@ -1115,7 +1136,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
engine->set_default_submission(engine);
}
@@ -1225,13 +1246,22 @@ static struct intel_timeline *get_timeline(struct i915_request *rq)
return tl;
}
+static const char *repr_timer(const struct timer_list *t)
+{
+ if (!READ_ONCE(t->expires))
+ return "inactive";
+
+ if (timer_pending(t))
+ return "active";
+
+ return "expired";
+}
+
static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct drm_i915_private *dev_priv = engine->i915;
- const struct intel_engine_execlists * const execlists =
- &engine->execlists;
- unsigned long flags;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
@@ -1288,19 +1318,20 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
unsigned int idx;
u8 read, write;
- drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
- ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
- ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
- num_entries);
+ drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n",
+ yesno(test_bit(TASKLET_STATE_SCHED,
+ &engine->execlists.tasklet.state)),
+ enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
+ repr_timer(&engine->execlists.timer));
read = execlists->csb_head;
write = READ_ONCE(*execlists->csb_write);
- drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
- read, write,
- yesno(test_bit(TASKLET_STATE_SCHED,
- &engine->execlists.tasklet.state)),
- enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
+ drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
+ read, write, num_entries);
+
if (read >= num_entries)
read = 0;
if (write >= num_entries)
@@ -1313,7 +1344,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]);
}
- spin_lock_irqsave(&engine->active.lock, flags);
+ execlists_active_lock_bh(execlists);
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
@@ -1351,7 +1382,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (tl)
intel_timeline_put(tl);
}
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1458,10 +1489,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&engine->active.lock, flags);
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
- wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
if (wakeref) {
intel_engine_print_registers(engine, m);
- intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(engine->uncore->rpm, wakeref);
} else {
drm_printf(m, "\tDevice is asleep; skipping register dump\n");
}
@@ -1493,8 +1524,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
- spin_lock_irqsave(&engine->active.lock, flags);
- write_seqlock(&engine->stats.lock);
+ execlists_active_lock_bh(execlists);
+ write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
@@ -1522,8 +1553,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
- write_sequnlock(&engine->stats.lock);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+ execlists_active_unlock_bh(execlists);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 8e5e513eddc9..67eb6183648a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -185,7 +185,7 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_engine_init__pm(struct intel_engine_cs *engine)
{
- struct intel_runtime_pm *rpm = &engine->i915->runtime_pm;
+ struct intel_runtime_pm *rpm = engine->uncore->rpm;
intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 6199064f332b..3451be034caf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -148,6 +148,7 @@ enum intel_engine_id {
VECS1,
#define _VECS(n) (VECS0 + (n))
I915_NUM_ENGINES
+#define INVALID_ENGINE ((enum intel_engine_id)-1)
};
struct st_preempt_hang {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 77cd5de83930..7f7150a733f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -160,10 +160,10 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
};
if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map)))
- return -1;
+ return INVALID_ENGINE;
if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max))
- return -1;
+ return INVALID_ENGINE;
return map[ring->class].base + ring->instance;
}
@@ -171,23 +171,15 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
static void add_legacy_ring(struct legacy_ring *ring,
struct intel_engine_cs *engine)
{
- int idx;
-
if (engine->gt != ring->gt || engine->class != ring->class) {
ring->gt = engine->gt;
ring->class = engine->class;
ring->instance = 0;
}
- idx = legacy_ring_idx(ring);
- if (unlikely(idx == -1))
- return;
-
- GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine));
- ring->gt->engine[idx] = engine;
- ring->instance++;
-
- engine->legacy_idx = idx;
+ engine->legacy_idx = legacy_ring_idx(ring);
+ if (engine->legacy_idx != INVALID_ENGINE)
+ ring->instance++;
}
void intel_engines_driver_register(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..4294f146f13c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
#define MI_LRI_CS_MMIO (1<<19)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
@@ -162,7 +163,8 @@
#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0)
#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
/*
* 3D instructions used by the kernel
@@ -223,6 +225,7 @@
#define PIPE_CONTROL_CS_STALL (1<<20)
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
@@ -230,7 +233,9 @@
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
+#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */
#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
+#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8f44cf8c79b2..1c4b6c9642ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -186,7 +186,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine_masked(engine, i915, engine_mask, id)
+ for_each_engine_masked(engine, gt, engine_mask, id)
gen8_clear_engine_error_register(engine);
}
}
@@ -197,7 +197,7 @@ static void gen6_check_faults(struct intel_gt *gt)
enum intel_engine_id id;
u32 fault;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
fault = GEN6_RING_FAULT_REG_READ(engine);
if (fault & RING_FAULT_VALID) {
DRM_DEBUG_DRIVER("Unexpected fault\n"
@@ -273,7 +273,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
{
- struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
intel_wakeref_t wakeref;
/*
@@ -297,13 +297,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
wmb();
- if (INTEL_INFO(i915)->has_coherent_ggtt)
+ if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
return;
intel_gt_chipset_flush(gt);
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- struct intel_uncore *uncore = gt->uncore;
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
unsigned long flags;
spin_lock_irqsave(&uncore->lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index b52e2ba3d092..b866d5b1eee0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -94,7 +94,7 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_gt_pm_init_early(struct intel_gt *gt)
{
- intel_wakeref_init(&gt->wakeref, &gt->i915->runtime_pm, &wf_ops);
+ intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
BLOCKING_INIT_NOTIFIER_HEAD(&gt->pm_notifications);
}
@@ -136,11 +136,18 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
intel_uc_sanitize(&gt->uc);
- if (!reset_engines(gt) && !force)
- return;
+ for_each_engine(engine, gt, id)
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
- for_each_engine(engine, gt->i915, id)
- __intel_engine_reset(engine, false);
+ if (reset_engines(gt) || force) {
+ for_each_engine(engine, gt, id)
+ __intel_engine_reset(engine, false);
+ }
+
+ for_each_engine(engine, gt, id)
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
}
void intel_gt_pm_disable(struct intel_gt *gt)
@@ -170,7 +177,7 @@ int intel_gt_resume(struct intel_gt *gt)
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_context *ce;
intel_engine_pm_get(engine);
@@ -222,7 +229,7 @@ void intel_gt_suspend(struct intel_gt *gt)
/* We expect to be idle already; but also want to be independent */
wait_for_idle(gt);
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
intel_rc6_disable(&gt->rc6);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 8aed89fd2cdc..b73229a84d85 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -4,6 +4,7 @@
* Copyright © 2019 Intel Corporation
*/
+#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
@@ -19,6 +20,15 @@ static void retire_requests(struct intel_timeline *tl)
break;
}
+static void flush_submission(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, gt, id)
+ intel_engine_flush_submission(engine);
+}
+
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -32,10 +42,14 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
if (unlikely(timeout < 0))
timeout = -timeout, interruptible = false;
+ flush_submission(gt); /* kick the ksoftirqd tasklets */
+
spin_lock_irqsave(&timelines->lock, flags);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
- if (!mutex_trylock(&tl->mutex))
+ if (!mutex_trylock(&tl->mutex)) {
+ active_count++; /* report busy to caller, try again? */
continue;
+ }
intel_timeline_get(tl);
GEM_BUG_ON(!tl->active_count);
@@ -48,7 +62,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
fence = i915_active_fence_get(&tl->last_request);
if (fence) {
timeout = dma_fence_wait_timeout(fence,
- true,
+ interruptible,
timeout);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..ae4aaf75ac78 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,6 +17,7 @@
#include "i915_vma.h"
#include "intel_engine_types.h"
+#include "intel_llc_types.h"
#include "intel_reset_types.h"
#include "intel_rc6_types.h"
#include "intel_wakeref.h"
@@ -79,6 +80,7 @@ struct intel_gt {
*/
intel_wakeref_t awake;
+ struct intel_llc llc;
struct intel_rc6 rc6;
struct blocking_notifier_head pm_notifications;
@@ -109,6 +111,11 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
+ /* 6 * 8 bytes */
+ INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+ /* 4 bytes */
+ INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
};
#endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
index 9814b18b32ad..0fdef00af9e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -237,7 +237,7 @@ static void hangcheck_declare_hang(struct intel_gt *gt,
hung &= ~stuck;
len = scnprintf(msg, sizeof(msg),
"%s on ", stuck == hung ? "no progress" : "hang");
- for_each_engine_masked(engine, gt->i915, hung, tmp)
+ for_each_engine_masked(engine, gt, hung, tmp)
len += scnprintf(msg + len, sizeof(msg) - len,
"%s, ", engine->name);
msg[len-2] = '\0';
@@ -271,7 +271,7 @@ static void hangcheck_elapsed(struct work_struct *work)
if (intel_gt_is_wedged(gt))
return;
- wakeref = intel_runtime_pm_get_if_in_use(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm);
if (!wakeref)
return;
@@ -281,7 +281,7 @@ static void hangcheck_elapsed(struct work_struct *work)
*/
intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct hangcheck hc;
intel_engine_breadcrumbs_irq(engine);
@@ -303,7 +303,7 @@ static void hangcheck_elapsed(struct work_struct *work)
if (GEM_SHOW_DEBUG() && (hung | stuck)) {
struct drm_printer p = drm_debug_printer("hangcheck");
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (intel_engine_is_idle(engine))
continue;
@@ -322,7 +322,7 @@ static void hangcheck_elapsed(struct work_struct *work)
if (hung)
hangcheck_declare_hang(gt, hung, stuck);
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
/* Reset timer in case GPU hangs without another request being added */
intel_gt_queue_hangcheck(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
new file mode 100644
index 000000000000..35093eb5f24e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/cpufreq.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_sideband.h"
+
+struct ia_constants {
+ unsigned int min_gpu_freq;
+ unsigned int max_gpu_freq;
+
+ unsigned int min_ring_freq;
+ unsigned int max_ia_freq;
+};
+
+static struct intel_gt *llc_to_gt(struct intel_llc *llc)
+{
+ return container_of(llc, struct intel_gt, llc);
+}
+
+static unsigned int cpu_max_MHz(void)
+{
+ struct cpufreq_policy *policy;
+ unsigned int max_khz;
+
+ policy = cpufreq_cpu_get(0);
+ if (policy) {
+ max_khz = policy->cpuinfo.max_freq;
+ cpufreq_cpu_put(policy);
+ } else {
+ /*
+ * Default to measured freq if none found, PCU will ensure we
+ * don't go over
+ */
+ max_khz = tsc_khz;
+ }
+
+ return max_khz / 1000;
+}
+
+static bool get_ia_constants(struct intel_llc *llc,
+ struct ia_constants *consts)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct intel_rps *rps = &i915->gt_pm.rps;
+
+ if (rps->max_freq <= rps->min_freq)
+ return false;
+
+ consts->max_ia_freq = cpu_max_MHz();
+
+ consts->min_ring_freq =
+ intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf;
+ /* convert DDR frequency from units of 266.6MHz to bandwidth */
+ consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
+
+ consts->min_gpu_freq = rps->min_freq;
+ consts->max_gpu_freq = rps->max_freq;
+ if (INTEL_GEN(i915) >= 9) {
+ /* Convert GT frequency to 50 HZ units */
+ consts->min_gpu_freq /= GEN9_FREQ_SCALER;
+ consts->max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ return true;
+}
+
+static void calc_ia_freq(struct intel_llc *llc,
+ unsigned int gpu_freq,
+ const struct ia_constants *consts,
+ unsigned int *out_ia_freq,
+ unsigned int *out_ring_freq)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ const int diff = consts->max_gpu_freq - gpu_freq;
+ unsigned int ia_freq = 0, ring_freq = 0;
+
+ if (INTEL_GEN(i915) >= 9) {
+ /*
+ * ring_freq = 2 * GT. ring_freq is in 100MHz units
+ * No floor required for ring frequency on SKL.
+ */
+ ring_freq = gpu_freq;
+ } else if (INTEL_GEN(i915) >= 8) {
+ /* max(2 * GT, DDR). NB: GT is 50MHz units */
+ ring_freq = max(consts->min_ring_freq, gpu_freq);
+ } else if (IS_HASWELL(i915)) {
+ ring_freq = mult_frac(gpu_freq, 5, 4);
+ ring_freq = max(consts->min_ring_freq, ring_freq);
+ /* leave ia_freq as the default, chosen by cpufreq */
+ } else {
+ const int min_freq = 15;
+ const int scale = 180;
+
+ /*
+ * On older processors, there is no separate ring
+ * clock domain, so in order to boost the bandwidth
+ * of the ring, we need to upclock the CPU (ia_freq).
+ *
+ * For GPU frequencies less than 750MHz,
+ * just use the lowest ring freq.
+ */
+ if (gpu_freq < min_freq)
+ ia_freq = 800;
+ else
+ ia_freq = consts->max_ia_freq - diff * scale / 2;
+ ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+ }
+
+ *out_ia_freq = ia_freq;
+ *out_ring_freq = ring_freq;
+}
+
+static void gen6_update_ring_freq(struct intel_llc *llc)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct ia_constants consts;
+ unsigned int gpu_freq;
+
+ if (!get_ia_constants(llc, &consts))
+ return;
+
+ /*
+ * For each potential GPU frequency, load a ring frequency we'd like
+ * to use for memory access. We do this by specifying the IA frequency
+ * the PCU should use as a reference to determine the ring frequency.
+ */
+ for (gpu_freq = consts.max_gpu_freq;
+ gpu_freq >= consts.min_gpu_freq;
+ gpu_freq--) {
+ unsigned int ia_freq, ring_freq;
+
+ calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+ sandybridge_pcode_write(i915,
+ GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+ ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+ ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+ gpu_freq);
+ }
+}
+
+void intel_llc_enable(struct intel_llc *llc)
+{
+ if (HAS_LLC(llc_to_gt(llc)->i915))
+ gen6_update_ring_freq(llc);
+}
+
+void intel_llc_disable(struct intel_llc *llc)
+{
+ /* Currently there is no HW configuration to be done to disable. */
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_llc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h
new file mode 100644
index 000000000000..ef09a890d2b7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_H
+#define INTEL_LLC_H
+
+struct intel_llc;
+
+void intel_llc_enable(struct intel_llc *llc);
+void intel_llc_disable(struct intel_llc *llc);
+
+#endif /* INTEL_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h
new file mode 100644
index 000000000000..ecad4687b930
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_TYPES_H
+#define INTEL_LLC_TYPES_H
+
+struct intel_llc {
+};
+
+#endif /* INTEL_LLC_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 468438fb47af..d0088d020220 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -247,8 +247,12 @@ static void __context_pin_release(struct intel_context *ce)
static void mark_eio(struct i915_request *rq)
{
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
+ if (i915_request_completed(rq))
+ return;
+
+ GEM_BUG_ON(i915_request_signaled(rq));
+
+ dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
}
@@ -348,10 +352,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
* However, the priority hint is a mere hint that we may need to
* preempt. If that hint is stale or we may be trying to preempt
* ourselves, ignore the request.
+ *
+ * More naturally we would write
+ * prio >= max(0, last);
+ * except that we wish to prevent triggering preemption at the same
+ * priority level: the task that is running should remain running
+ * to preserve FIFO ordering of dependencies.
*/
- last_prio = effective_prio(rq);
- if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
- last_prio))
+ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
+ if (engine->execlists.queue_priority_hint <= last_prio)
return false;
/*
@@ -669,64 +678,6 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- NOP(13),
- LRI(2, POSTED),
- REG16(0x200),
- REG16(0x204),
-
- NOP(11),
- LRI(50, POSTED),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
-
END(),
};
@@ -857,6 +808,15 @@ static const u8 gen12_rcs_offsets[] = {
static const u8 *reg_offsets(const struct intel_engine_cs *engine)
{
+ /*
+ * The gen12+ lists only have the registers we program in the basic
+ * default state. We rely on the context image using relative
+ * addressing to automatic fixup the register state between the
+ * physical engines for virtual engine.
+ */
+ GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+ !intel_engine_has_relative_mmio(engine));
+
if (engine->class == RENDER_CLASS) {
if (INTEL_GEN(engine->i915) >= 12)
return gen12_rcs_offsets;
@@ -892,7 +852,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
- struct intel_engine_cs *owner;
if (i915_request_completed(rq))
continue; /* XXX */
@@ -907,8 +866,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
* engine so that it can be moved across onto another physical
* engine as load dictates.
*/
- owner = rq->hw_context->engine;
- if (likely(owner == engine)) {
+ if (likely(rq->execution_mask == engine->mask)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
@@ -919,6 +877,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
active = rq;
} else {
+ struct intel_engine_cs *owner = rq->hw_context->engine;
+
/*
* Decouple the virtual breadcrumb before moving it
* back to the virtual engine -- we don't want the
@@ -928,7 +888,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
*/
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&rq->fence.flags)) {
- spin_lock(&rq->lock);
+ spin_lock_nested(&rq->lock,
+ SINGLE_DEPTH_NESTING);
i915_request_cancel_breadcrumb(rq);
spin_unlock(&rq->lock);
}
@@ -1092,6 +1053,10 @@ static u64 execlists_update_context(const struct i915_request *rq)
desc = ce->lrc_desc;
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ /* Wa_1607138340:tgl */
+ if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
+ desc |= CTX_DESC_FORCE_RESTORE;
+
return desc;
}
@@ -1137,25 +1102,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
trace_ports(execlists, msg, execlists->pending);
- if (!execlists->pending[0])
+ if (!execlists->pending[0]) {
+ GEM_TRACE_ERR("Nothing pending for promotion!\n");
return false;
+ }
- if (execlists->pending[execlists_num_ports(execlists)])
+ if (execlists->pending[execlists_num_ports(execlists)]) {
+ GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
+ execlists_num_ports(execlists));
return false;
+ }
for (port = execlists->pending; (rq = *port); port++) {
- if (ce == rq->hw_context)
+ if (ce == rq->hw_context) {
+ GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ port - execlists->pending);
return false;
+ }
ce = rq->hw_context;
if (i915_request_completed(rq))
continue;
- if (i915_active_is_idle(&ce->active))
+ if (i915_active_is_idle(&ce->active)) {
+ GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ port - execlists->pending);
return false;
+ }
+
+ if (!i915_vma_is_pinned(ce->state)) {
+ GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ port - execlists->pending);
+ return false;
+ }
- if (!i915_vma_is_pinned(ce->state))
+ if (!i915_vma_is_pinned(ce->ring->vma)) {
+ GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ port - execlists->pending);
return false;
+ }
}
return ce;
@@ -1232,6 +1217,10 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
+ if (unlikely((prev->flags ^ next->flags) &
+ (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+ return false;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
@@ -1288,7 +1277,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
static struct i915_request *
last_active(const struct intel_engine_execlists *execlists)
{
- struct i915_request * const *last = execlists->active;
+ struct i915_request * const *last = READ_ONCE(execlists->active);
while (*last && i915_request_completed(*last))
last++;
@@ -1489,7 +1478,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
- !timer_pending(&engine->execlists.timer)) {
+ timer_expired(&engine->execlists.timer)) {
GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
engine->name,
last->fence.context,
@@ -1525,8 +1514,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* submission.
*/
if (!list_is_last(&last->sched.link,
- &engine->active.requests))
+ &engine->active.requests)) {
+ /*
+ * Even if ELSP[1] is occupied and not worthy
+ * of timeslices, our queue might be.
+ */
+ if (!execlists->timer.expires &&
+ need_timeslice(engine, last))
+ mod_timer(&execlists->timer,
+ jiffies + 1);
return;
+ }
/*
* WaIdleLiteRestore:bdw,skl
@@ -1680,6 +1678,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last->hw_context == rq->hw_context)
goto done;
+ if (i915_request_has_sentinel(last))
+ goto done;
+
/*
* If GVT overrides us we only ever submit
* port[0], leaving port[1] empty. Note that we
@@ -1859,6 +1860,13 @@ static void process_csb(struct intel_engine_cs *engine)
const u8 num_entries = execlists->csb_size;
u8 head, tail;
+ /*
+ * As we modify our execlists state tracking we require exclusive
+ * access. Either we are inside the tasklet, or the tasklet is disabled
+ * and we assume that is only inside the reset paths and so serialised.
+ */
+ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
+ !reset_in_progress(execlists));
GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
/*
@@ -1920,6 +1928,9 @@ static void process_csb(struct intel_engine_cs *engine)
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
+ if (!inject_preempt_hang(execlists))
+ ring_set_paused(engine, 0);
+
/* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", execlists->active);
while (*execlists->active)
@@ -1935,9 +1946,8 @@ static void process_csb(struct intel_engine_cs *engine)
if (enable_timeslice(execlists))
mod_timer(&execlists->timer, jiffies + 1);
-
- if (!inject_preempt_hang(execlists))
- ring_set_paused(engine, 0);
+ else
+ cancel_timer(&execlists->timer);
WRITE_ONCE(execlists->pending[0], NULL);
} else {
@@ -1980,8 +1990,11 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
- if (!engine->execlists.pending[0])
+ if (!engine->execlists.pending[0]) {
+ rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
+ rcu_read_unlock();
+ }
}
/*
@@ -2773,8 +2786,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
if (!rq)
goto unwind;
+ /* We still have requests in-flight; the engine should be active */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
ce = rq->hw_context;
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
/* Proclaim we have exclusive access to the context image! */
@@ -2782,10 +2797,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
rq = active_request(rq);
if (!rq) {
+ /* Idle context; tidy up the ring so we can restart afresh */
ce->ring->head = ce->ring->tail;
goto out_replay;
}
+ /* Context has requests still in-flight; it should not be idle! */
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
/*
@@ -3205,8 +3223,11 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ /* Wa_1409600907:tgl */
+ flags |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
@@ -3232,6 +3253,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
@@ -3253,6 +3275,26 @@ static int gen12_emit_flush_render(struct i915_request *request,
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
+
+ /*
+ * Wa_1604544889:tgl
+ */
+ if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ flags = 0;
+ flags |= PIPE_CONTROL_CS_STALL;
+ flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags,
+ LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
}
return 0;
@@ -3415,15 +3457,18 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ /* Wa_1409600907:tgl */
+ PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE);
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_HDC_PIPELINE_FLUSH);
return gen12_emit_fini_breadcrumb_footer(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
{
- del_timer(&engine->execlists.timer);
+ cancel_timer(&engine->execlists.timer);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -3447,7 +3492,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
}
- if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
+ if (INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
}
@@ -4169,6 +4214,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.i915 = ctx->i915;
ve->base.gt = siblings[0]->gt;
+ ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
ve->base.class = OTHER_CLASS;
ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 728704bbbe18..5bac3966906b 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -287,10 +287,9 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
GEN11_MOCS_ENTRIES
};
-static bool get_mocs_settings(struct intel_gt *gt,
+static bool get_mocs_settings(const struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
- struct drm_i915_private *i915 = gt->i915;
bool result = false;
if (INTEL_GEN(i915) >= 12) {
@@ -331,9 +330,9 @@ static bool get_mocs_settings(struct intel_gt *gt,
return result;
}
-static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
+static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index)
{
- switch (engine_id) {
+ switch (engine->id) {
case RCS0:
return GEN9_GFX_MOCS(index);
case VCS0:
@@ -347,7 +346,7 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
case VCS2:
return GEN11_MFX2_MOCS(index);
default:
- MISSING_CASE(engine_id);
+ MISSING_CASE(engine->id);
return INVALID_MMIO_REG;
}
}
@@ -365,118 +364,25 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
return table->table[I915_MOCS_PTE].control_value;
}
-/**
- * intel_mocs_init_engine() - emit the mocs control table
- * @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- */
-void intel_mocs_init_engine(struct intel_engine_cs *engine)
+static void init_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
{
- struct intel_gt *gt = engine->gt;
- struct intel_uncore *uncore = gt->uncore;
- struct drm_i915_mocs_table table;
- unsigned int index;
- u32 unused_value;
-
- /* Platforms with global MOCS do not need per-engine initialization. */
- if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
- return;
-
- /* Called under a blanket forcewake */
- assert_forcewakes_active(uncore, FORCEWAKE_ALL);
-
- if (!get_mocs_settings(gt, &table))
- return;
-
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].control_value;
-
- for (index = 0; index < table.size; index++) {
- u32 value = get_entry_control(&table, index);
+ struct intel_uncore *uncore = engine->uncore;
+ u32 unused_value = table->table[I915_MOCS_PTE].control_value;
+ unsigned int i;
+ for (i = 0; i < table->size; i++)
intel_uncore_write_fw(uncore,
- mocs_register(engine->id, index),
- value);
- }
+ mocs_register(engine, i),
+ get_entry_control(table, i));
- /* All remaining entries are also unused */
- for (; index < table.n_entries; index++)
+ /* All remaining entries are unused */
+ for (; i < table->n_entries; i++)
intel_uncore_write_fw(uncore,
- mocs_register(engine->id, index),
+ mocs_register(engine, i),
unused_value);
}
-static void intel_mocs_init_global(struct intel_gt *gt)
-{
- struct intel_uncore *uncore = gt->uncore;
- struct drm_i915_mocs_table table;
- unsigned int index;
-
- GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
-
- if (!get_mocs_settings(gt, &table))
- return;
-
- if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
- return;
-
- for (index = 0; index < table.size; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[index].control_value);
-
- /*
- * Ok, now set the unused entries to the invalid entry (index 0). These
- * entries are officially undefined and no contract for the contents and
- * settings is given for these entries.
- */
- for (; index < table.n_entries; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[0].control_value);
-}
-
-static int emit_mocs_control_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
-{
- enum intel_engine_id engine = rq->engine->id;
- unsigned int index;
- u32 unused_value;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].control_value;
-
- cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
-
- for (index = 0; index < table->size; index++) {
- u32 value = get_entry_control(table, index);
-
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = value;
- }
-
- /* All remaining entries are also unused */
- for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = unused_value;
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
/*
* Get l3cc_value from MOCS entry taking into account when it's not used:
* I915_MOCS_PTE's value is returned in this case.
@@ -494,141 +400,93 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
u16 low,
u16 high)
{
- return low | high << 16;
+ return low | (u32)high << 16;
}
-static int emit_mocs_l3cc_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
+static void init_l3cc_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
{
- u16 unused_value;
+ struct intel_uncore *uncore = engine->uncore;
+ u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value;
unsigned int i;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].l3cc_value;
-
- cs = intel_ring_begin(rq, 2 + table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
u16 high = get_entry_l3cc(table, 2 * i + 1);
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, high);
+ intel_uncore_write(uncore,
+ GEN9_LNCFCMOCS(i),
+ l3cc_combine(table, low, high));
}
/* Odd table size - 1 left over */
- if (table->size & 0x01) {
+ if (table->size & 1) {
u16 low = get_entry_l3cc(table, 2 * i);
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, unused_value);
+ intel_uncore_write(uncore,
+ GEN9_LNCFCMOCS(i),
+ l3cc_combine(table, low, unused_value));
i++;
}
/* All remaining entries are also unused */
- for (; i < table->n_entries / 2; i++) {
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, unused_value, unused_value);
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
+ for (; i < table->n_entries / 2; i++)
+ intel_uncore_write(uncore,
+ GEN9_LNCFCMOCS(i),
+ l3cc_combine(table, unused_value,
+ unused_value));
}
-static void intel_mocs_init_l3cc_table(struct intel_gt *gt)
+void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
- struct intel_uncore *uncore = gt->uncore;
struct drm_i915_mocs_table table;
- unsigned int i;
- u16 unused_value;
- if (!get_mocs_settings(gt, &table))
+ /* Called under a blanket forcewake */
+ assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
+
+ if (!get_mocs_settings(engine->i915, &table))
return;
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].l3cc_value;
+ /* Platforms with global MOCS do not need per-engine initialization. */
+ if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915))
+ init_mocs_table(engine, &table);
- for (i = 0; i < table.size / 2; i++) {
- u16 low = get_entry_l3cc(&table, 2 * i);
- u16 high = get_entry_l3cc(&table, 2 * i + 1);
+ if (engine->class == RENDER_CLASS)
+ init_l3cc_table(engine, &table);
+}
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, high));
- }
+static void intel_mocs_init_global(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ struct drm_i915_mocs_table table;
+ unsigned int index;
- /* Odd table size - 1 left over */
- if (table.size & 0x01) {
- u16 low = get_entry_l3cc(&table, 2 * i);
+ GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, unused_value));
- i++;
- }
+ if (!get_mocs_settings(gt->i915, &table))
+ return;
- /* All remaining entries are also unused */
- for (; i < table.n_entries / 2; i++)
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, unused_value,
- unused_value));
-}
+ if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
+ return;
-/**
- * intel_mocs_emit() - program the MOCS register.
- * @rq: Request to use to set up the MOCS tables.
- *
- * This function will emit a batch buffer with the values required for
- * programming the MOCS register values for all the currently supported
- * rings.
- *
- * These registers are partially stored in the RCS context, so they are
- * emitted at the same time so that when a context is created these registers
- * are set up. These registers have to be emitted into the start of the
- * context as setting the ELSP will re-init some of these registers back
- * to the hw values.
- *
- * Return: 0 on success, otherwise the error status.
- */
-int intel_mocs_emit(struct i915_request *rq)
-{
- struct drm_i915_mocs_table t;
- int ret;
-
- if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) ||
- rq->engine->class != RENDER_CLASS)
- return 0;
-
- if (get_mocs_settings(rq->engine->gt, &t)) {
- /* Program the RCS control registers */
- ret = emit_mocs_control_table(rq, &t);
- if (ret)
- return ret;
-
- /* Now program the l3cc registers */
- ret = emit_mocs_l3cc_table(rq, &t);
- if (ret)
- return ret;
- }
+ for (index = 0; index < table.size; index++)
+ intel_uncore_write(uncore,
+ GEN12_GLOBAL_MOCS(index),
+ table.table[index].control_value);
- return 0;
+ /*
+ * Ok, now set the unused entries to the invalid entry (index 0). These
+ * entries are officially undefined and no contract for the contents and
+ * settings is given for these entries.
+ */
+ for (; index < table.n_entries; index++)
+ intel_uncore_write(uncore,
+ GEN12_GLOBAL_MOCS(index),
+ table.table[0].control_value);
}
void intel_mocs_init(struct intel_gt *gt)
{
- intel_mocs_init_l3cc_table(gt);
-
if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
intel_mocs_init_global(gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 2ae816b7ca19..83371f3e6ba1 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -49,13 +49,10 @@
* context handling keep the MOCS in step.
*/
-struct i915_request;
struct intel_engine_cs;
struct intel_gt;
void intel_mocs_init(struct intel_gt *gt);
void intel_mocs_init_engine(struct intel_engine_cs *engine);
-int intel_mocs_emit(struct i915_request *rq);
-
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 71184aa72896..70f0e01a38b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -65,7 +65,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
- for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
@@ -133,7 +133,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
- for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
@@ -192,7 +192,7 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
- for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC_SLEEP, 0);
set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
@@ -219,7 +219,7 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC_SLEEP, 0);
@@ -344,7 +344,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
- for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC_SLEEP, 0);
@@ -371,7 +371,7 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
- for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC6_THRESHOLD, 0x557);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 7b3d9d4517a0..bf8d1ed4b1d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -282,14 +282,14 @@ static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- struct intel_engine_cs *engine;
- const u32 hw_engine_mask[] = {
+ static const u32 hw_engine_mask[] = {
[RCS0] = GEN6_GRDOM_RENDER,
[BCS0] = GEN6_GRDOM_BLT,
[VCS0] = GEN6_GRDOM_MEDIA,
[VCS1] = GEN8_GRDOM_MEDIA2,
[VECS0] = GEN6_GRDOM_VECS,
};
+ struct intel_engine_cs *engine;
u32 hw_mask;
if (engine_mask == ALL_ENGINES) {
@@ -298,7 +298,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t tmp;
hw_mask = 0;
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
}
@@ -413,7 +413,7 @@ static int gen11_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- const u32 hw_engine_mask[] = {
+ static const u32 hw_engine_mask[] = {
[RCS0] = GEN11_GRDOM_RENDER,
[BCS0] = GEN11_GRDOM_BLT,
[VCS0] = GEN11_GRDOM_MEDIA,
@@ -432,7 +432,7 @@ static int gen11_reset_engines(struct intel_gt *gt,
hw_mask = GEN11_GRDOM_FULL;
} else {
hw_mask = 0;
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
ret = gen11_lock_sfc(engine, &hw_mask);
@@ -451,7 +451,7 @@ sfc_unlock:
* expiration).
*/
if (engine_mask != ALL_ENGINES)
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
gen11_unlock_sfc(engine);
return ret;
@@ -510,7 +510,7 @@ static int gen8_reset_engines(struct intel_gt *gt,
intel_engine_mask_t tmp;
int ret;
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
goto skip_reset;
@@ -536,7 +536,7 @@ static int gen8_reset_engines(struct intel_gt *gt,
ret = gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
return ret;
@@ -682,7 +682,7 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
awake |= engine->mask;
reset_prepare_engine(engine);
@@ -712,10 +712,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
if (err)
return err;
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
__intel_engine_reset(engine, stalled_mask & engine->mask);
- i915_gem_restore_fences(gt->i915);
+ i915_gem_restore_fences(gt->ggtt);
return err;
}
@@ -733,7 +733,7 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
reset_finish_engine(engine);
if (awake & engine->mask)
intel_engine_pm_put(engine);
@@ -769,7 +769,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
struct drm_printer p = drm_debug_printer(__func__);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
intel_engine_dump(engine, &p, "%s\n", engine->name);
}
@@ -786,7 +786,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
/*
@@ -798,7 +798,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
set_bit(I915_WEDGED, &gt->reset.flags);
/* Mark all executing requests as skipped */
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
engine->cancel_requests(engine);
reset_finish(gt, awake);
@@ -811,7 +811,7 @@ void intel_gt_set_wedged(struct intel_gt *gt)
intel_wakeref_t wakeref;
mutex_lock(&gt->reset.mutex);
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
__intel_gt_set_wedged(gt);
mutex_unlock(&gt->reset.mutex);
}
@@ -872,8 +872,14 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
- if (!ok)
+ if (!ok) {
+ /*
+ * Warn CI about the unrecoverable wedged condition.
+ * Time for a reboot.
+ */
+ add_taint_for_CI(TAINT_WARN);
return false;
+ }
/*
* Undo nop_submit_request. We prevent all new i915 requests from
@@ -928,7 +934,7 @@ static int resume(struct intel_gt *gt)
enum intel_engine_id id;
int ret;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
ret = engine->resume(engine);
if (ret)
return ret;
@@ -1186,7 +1192,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
* isn't the case at least when we get here by doing a
* simulated reset via debugfs, so get an RPM reference.
*/
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
@@ -1200,7 +1206,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
* single reset fails.
*/
if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
@@ -1228,7 +1234,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
synchronize_rcu_expedited();
/* Prevent any other reset-engine attempt. */
- for_each_engine(engine, gt->i915, tmp) {
+ for_each_engine(engine, gt, tmp) {
while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
wait_on_bit(&gt->reset.flags,
@@ -1238,7 +1244,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_gt_reset_global(gt, engine_mask, msg);
- for_each_engine(engine, gt->i915, tmp)
+ for_each_engine(engine, gt, tmp)
clear_bit_unlock(I915_RESET_ENGINE + engine->id,
&gt->reset.flags);
clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
@@ -1246,7 +1252,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
wake_up_all(&gt->reset.queue);
out:
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 311fdc0a21bc..bf631f15aa78 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1609,7 +1609,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
+ for_each_engine(signaller, engine->gt, id) {
if (signaller == engine)
continue;
@@ -1663,7 +1663,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
i915_reg_t last_reg = {}; /* keep gcc quiet */
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
+ for_each_engine(signaller, engine->gt, id) {
if (signaller == engine)
continue;
@@ -1676,7 +1676,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
/* Insert a delay before the next switch! */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(last_reg);
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ *cs++ = intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_DEFAULT);
*cs++ = MI_NOOP;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index ba65e5018978..af8a8183154a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -567,7 +567,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- /* Wa_1409142259 */
+ /* Wa_1409142259:tgl */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
}
@@ -892,11 +892,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
+
+ /* Wa_1607087056:icl */
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
}
static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
+ /* Wa_1409420604:tgl */
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
+
+ /* Wa_1409180338:tgl */
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
}
static void
@@ -1260,6 +1276,26 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ /* Wa_1606700617:tgl */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+
+ /* Wa_1607138336:tgl */
+ wa_write_or(wal,
+ GEN9_CTX_PREEMPT_REG,
+ GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
+
+ /* Wa_1607030317:tgl */
+ /* Wa_1607186500:tgl */
+ /* Wa_1607297627:tgl */
+ wa_masked_en(wal,
+ GEN6_RC_SLEEP_PSMI_CONTROL,
+ GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+ }
+
if (IS_GEN(i915, 11)) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 5d43cbc3f345..123db2c3f956 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -240,6 +240,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
struct mock_engine *engine;
GEM_BUG_ON(id >= I915_NUM_ENGINES);
+ GEM_BUG_ON(!i915->gt.uncore);
engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
if (!engine)
@@ -248,9 +249,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
/* minimal engine setup for requests */
engine->base.i915 = i915;
engine->base.gt = &i915->gt;
+ engine->base.uncore = i915->gt.uncore;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id;
engine->base.mask = BIT(id);
+ engine->base.legacy_idx = INVALID_ENGINE;
engine->base.instance = id;
engine->base.status_page.addr = (void *)(engine + 1);
@@ -265,6 +268,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.reset.finish = mock_reset_finish;
engine->base.cancel_requests = mock_cancel_requests;
+ i915->gt.engine[id] = &engine->base;
+ i915->gt.engine_class[0][id] = &engine->base;
+
/* fake hw queue */
spin_lock_init(&engine->hw_lock);
timer_setup(&engine->hw_delay, hw_delay_complete, 0);
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 7c838a57e174..f63a26a3e620 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -159,7 +159,7 @@ static int live_context_size(void *arg)
if (IS_ERR(fixme))
return PTR_ERR(fixme);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct {
struct drm_i915_gem_object *state;
void *pinned;
@@ -305,7 +305,7 @@ static int live_active_context(void *arg)
goto out_file;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
err = __live_active_context(engine, fixme);
if (err)
break;
@@ -415,7 +415,7 @@ static int live_remote_context(void *arg)
goto out_file;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
err = __live_remote_context(engine, fixme);
if (err)
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 3a1419376912..20b9c83f43ad 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -25,7 +25,7 @@ static int live_engine_pm(void *arg)
}
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
const typeof(*igt_atomic_phases) *p;
for (p = igt_atomic_phases; p->name; p++) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 87985bd46423..5d429037cdad 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -5,6 +5,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include "selftest_llc.h"
+
static int live_gt_resume(void *arg)
{
struct intel_gt *gt = arg;
@@ -32,6 +34,13 @@ static int live_gt_resume(void *arg)
err = -EINVAL;
break;
}
+
+ err = st_llc_verify(&gt->llc);
+ if (err) {
+ pr_err("llc state not restored upon resume!\n");
+ intel_gt_set_wedged_on_init(gt);
+ break;
+ }
} while (!__igt_timeout(end_time, NULL));
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index e8a40df79bd0..8e0016464325 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -323,7 +323,7 @@ static int igt_hang_sanitycheck(void *arg)
if (err)
return err;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_wedge_me w;
long timeout;
@@ -400,7 +400,7 @@ static int igt_reset_nop(void *arg)
reset_count = i915_reset_count(global);
count = 0;
do {
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
int i;
for (i = 0; i < 16; i++) {
@@ -471,7 +471,7 @@ static int igt_reset_nop_engine(void *arg)
}
i915_gem_context_clear_bannable(ctx);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
unsigned int count;
IGT_TIMEOUT(end_time);
@@ -560,7 +560,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
return err;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
IGT_TIMEOUT(end_time);
@@ -782,7 +782,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
h.ctx->sched.priority = 1024;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct active_engine threads[I915_NUM_ENGINES] = {};
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
@@ -800,7 +800,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
memset(threads, 0, sizeof(threads));
- for_each_engine(other, gt->i915, tmp) {
+ for_each_engine(other, gt, tmp) {
struct task_struct *tsk;
threads[tmp].resets =
@@ -914,7 +914,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
unwind:
- for_each_engine(other, gt->i915, tmp) {
+ for_each_engine(other, gt, tmp) {
int ret;
if (!threads[tmp].task)
@@ -1335,7 +1335,7 @@ static int wait_for_others(struct intel_gt *gt,
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (engine == exclude)
continue;
@@ -1363,7 +1363,7 @@ static int igt_reset_queue(void *arg)
if (err)
goto unlock;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *prev;
IGT_TIMEOUT(end_time);
unsigned int count;
@@ -1651,7 +1651,7 @@ static int igt_reset_engines_atomic(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
err = igt_atomic_reset_engine(engine, p);
if (err)
goto out;
@@ -1695,14 +1695,14 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(gt))
return -EIO; /* we're long past hope of a successful reset */
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
drain_delayed_work(&gt->hangcheck.work); /* flush param */
err = intel_gt_live_subtests(tests, gt);
i915_modparams.enable_hangcheck = saved_hangcheck;
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
new file mode 100644
index 000000000000..a7057785e420
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -0,0 +1,77 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_pm.h" /* intel_gpu_freq() */
+#include "selftest_llc.h"
+
+static int gen6_verify_ring_freq(struct intel_llc *llc)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct ia_constants consts;
+ intel_wakeref_t wakeref;
+ unsigned int gpu_freq;
+ int err = 0;
+
+ wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm);
+
+ if (!get_ia_constants(llc, &consts)) {
+ err = -ENODEV;
+ goto out_rpm;
+ }
+
+ for (gpu_freq = consts.min_gpu_freq;
+ gpu_freq <= consts.max_gpu_freq;
+ gpu_freq++) {
+ unsigned int ia_freq, ring_freq, found;
+ u32 val;
+
+ calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+
+ val = gpu_freq;
+ if (sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &val, NULL)) {
+ pr_err("Failed to read freq table[%d], range [%d, %d]\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq);
+ err = -ENXIO;
+ break;
+ }
+
+ found = (val >> 0) & 0xff;
+ if (found != ia_freq) {
+ pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+ intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ found, ia_freq);
+ err = -EINVAL;
+ break;
+ }
+
+ found = (val >> 8) & 0xff;
+ if (found != ring_freq) {
+ pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+ intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ found, ring_freq);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+out_rpm:
+ intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref);
+ return err;
+}
+
+int st_llc_verify(struct intel_llc *llc)
+{
+ int err = 0;
+
+ if (HAS_LLC(llc_to_gt(llc)->i915))
+ err = gen6_verify_ring_freq(llc);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h
new file mode 100644
index 000000000000..873f896e72f2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_LLC_H
+#define SELFTEST_LLC_H
+
+struct intel_llc;
+
+int st_llc_verify(struct intel_llc *llc);
+
+#endif /* SELFTEST_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 393ae5321e1d..5dc679781a08 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -19,22 +19,52 @@
#include "gem/selftests/igt_gem_utils.h"
#include "gem/selftests/mock_context.h"
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
static int live_sanitycheck(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx;
struct intel_context *ce;
struct igt_spinner spin;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
return 0;
- if (igt_spinner_init(&spin, &i915->gt))
+ if (igt_spinner_init(&spin, gt))
return -ENOMEM;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
goto err_spin;
@@ -51,13 +81,13 @@ static int live_sanitycheck(void *arg)
if (!igt_wait_for_spinner(&spin, rq)) {
GEM_TRACE("spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx;
}
igt_spinner_end(&spin);
- if (igt_flush_test(i915)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_ctx;
}
@@ -72,12 +102,11 @@ err_spin:
return err;
}
-static int live_unlite_restore(struct drm_i915_private *i915, int prio)
+static int live_unlite_restore(struct intel_gt *gt, int prio)
{
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
struct igt_spinner spin;
int err = -ENOMEM;
@@ -86,18 +115,15 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio)
* on the same engine from the same parent context.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin, &i915->gt))
- goto err_unlock;
+ if (igt_spinner_init(&spin, gt))
+ return err;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
goto err_spin;
err = 0;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_context *ce[2] = {};
struct i915_request *rq[2];
struct igt_live_test t;
@@ -109,7 +135,7 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio)
if (!intel_engine_can_store_dword(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
break;
}
@@ -143,11 +169,11 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio)
GEM_BUG_ON(!ce[1]->ring->size);
intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
- local_bh_disable(); /* appease lockdep */
+ local_irq_disable(); /* appease lockdep */
__context_pin_acquire(ce[1]);
__execlists_update_reg_state(ce[1], engine);
__context_pin_release(ce[1]);
- local_bh_enable();
+ local_irq_enable();
rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq[0])) {
@@ -234,9 +260,6 @@ err_ce:
kernel_context_close(ctx);
err_spin:
igt_spinner_fini(&spin);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -302,7 +325,13 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
if (IS_ERR(rq))
goto out_ctx;
- err = emit_semaphore_chain(rq, vma, idx);
+ err = 0;
+ if (rq->engine->emit_init_breadcrumb)
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err == 0)
+ err = emit_semaphore_chain(rq, vma, idx);
+ if (err == 0)
+ i915_request_get(rq);
i915_request_add(rq);
if (err)
rq = ERR_PTR(err);
@@ -315,10 +344,10 @@ out_ctx:
static int
release_queue(struct intel_engine_cs *engine,
struct i915_vma *vma,
- int idx)
+ int idx, int prio)
{
struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+ .priority = prio,
};
struct i915_request *rq;
u32 *cs;
@@ -339,9 +368,15 @@ release_queue(struct intel_engine_cs *engine,
*cs++ = 1;
intel_ring_advance(rq, cs);
+
+ i915_request_get(rq);
i915_request_add(rq);
+ local_bh_disable();
engine->schedule(rq, &attr);
+ local_bh_enable(); /* kick tasklet */
+
+ i915_request_put(rq);
return 0;
}
@@ -360,8 +395,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
if (IS_ERR(head))
return PTR_ERR(head);
- i915_request_get(head);
- for_each_engine(engine, outer->i915, id) {
+ for_each_engine(engine, outer->gt, id) {
for (i = 0; i < count; i++) {
struct i915_request *rq;
@@ -370,10 +404,12 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
err = PTR_ERR(rq);
goto out;
}
+
+ i915_request_put(rq);
}
}
- err = release_queue(outer, vma, n);
+ err = release_queue(outer, vma, n, INT_MAX);
if (err)
goto out;
@@ -393,7 +429,7 @@ out:
static int live_timeslice_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
void *vaddr;
@@ -409,11 +445,11 @@ static int live_timeslice_preempt(void *arg)
* ready task.
*/
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -433,7 +469,7 @@ static int live_timeslice_preempt(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_has_preemption(engine))
continue;
@@ -443,7 +479,7 @@ static int live_timeslice_preempt(void *arg)
if (err)
goto err_pin;
- if (igt_flush_test(i915)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_pin;
}
@@ -459,9 +495,153 @@ err_obj:
return err;
}
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static void wait_for_submit(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+ } while (!i915_request_is_active(rq));
+}
+
+static int live_timeslice_queue(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err = 0;
+
+ /*
+ * Make sure that even if ELSP[0] and ELSP[1] are filled with
+ * timeslicing between them disabled, we *do* enable timeslicing
+ * if the queue demands it. (Normally, we do not submit if
+ * ELSP[1] is already occupied, so must rely on timeslicing to
+ * eject ELSP[0] in favour of the queue.)
+ */
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_map;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+ };
+ struct i915_request *rq, *nop;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /* ELSP[0]: semaphore wait */
+ rq = semaphore_queue(engine, vma, 0);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_pin;
+ }
+ engine->schedule(rq, &attr);
+ wait_for_submit(engine, rq);
+
+ /* ELSP[1]: nop request */
+ nop = nop_request(engine);
+ if (IS_ERR(nop)) {
+ err = PTR_ERR(nop);
+ i915_request_put(rq);
+ goto err_pin;
+ }
+ wait_for_submit(engine, nop);
+ i915_request_put(nop);
+
+ GEM_BUG_ON(i915_request_completed(rq));
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ /* Queue: semaphore signal, matching priority as semaphore */
+ err = release_queue(engine, vma, 1, effective_prio(rq));
+ if (err) {
+ i915_request_put(rq);
+ goto err_pin;
+ }
+
+ intel_engine_flush_submission(engine);
+ if (!READ_ONCE(engine->execlists.timer.expires) &&
+ !i915_request_completed(rq)) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+ GEM_TRACE_DUMP();
+
+ memset(vaddr, 0xff, PAGE_SIZE);
+ err = -EINVAL;
+ }
+
+ /* Timeslice every jiffie, so within 2 we should signal */
+ if (i915_request_wait(rq, 0, 3) < 0) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ pr_err("%s: Failed to timeslice into queue\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ memset(vaddr, 0xff, PAGE_SIZE);
+ err = -EIO;
+ }
+ i915_request_put(rq);
+ if (err)
+ break;
+ }
+
+err_pin:
+ i915_vma_unpin(vma);
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
+ return err;
+}
+
static int live_busywait_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
@@ -475,19 +655,19 @@ static int live_busywait_preempt(void *arg)
* preempt the busywaits used to synchronise between rings.
*/
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
return -ENOMEM;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_ctx_lo;
@@ -499,7 +679,7 @@ static int live_busywait_preempt(void *arg)
goto err_obj;
}
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_map;
@@ -509,7 +689,7 @@ static int live_busywait_preempt(void *arg)
if (err)
goto err_map;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *lo, *hi;
struct igt_live_test t;
u32 *cs;
@@ -520,7 +700,7 @@ static int live_busywait_preempt(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_vma;
}
@@ -600,7 +780,7 @@ static int live_busywait_preempt(void *arg)
i915_request_add(hi);
if (i915_request_wait(lo, 0, HZ / 5) < 0) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to preempt semaphore busywait!\n",
engine->name);
@@ -608,7 +788,7 @@ static int live_busywait_preempt(void *arg)
intel_engine_dump(engine, &p, "%s\n", engine->name);
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_vma;
}
@@ -654,45 +834,45 @@ spinner_create_request(struct igt_spinner *spin,
static int live_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+ if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
pr_err("Logical preemption supported, but not exposed\n");
- if (igt_spinner_init(&spin_hi, &i915->gt))
+ if (igt_spinner_init(&spin_hi, gt))
return -ENOMEM;
- if (igt_spinner_init(&spin_lo, &i915->gt))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -708,7 +888,7 @@ static int live_preempt(void *arg)
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -725,7 +905,7 @@ static int live_preempt(void *arg)
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -753,7 +933,7 @@ err_spin_hi:
static int live_late_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
@@ -761,34 +941,34 @@ static int live_late_preempt(void *arg)
enum intel_engine_id id;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (igt_spinner_init(&spin_hi, &i915->gt))
+ if (igt_spinner_init(&spin_hi, gt))
return -ENOMEM;
- if (igt_spinner_init(&spin_lo, &i915->gt))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
ctx_lo->sched.priority = I915_USER_PRIORITY(1);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -852,7 +1032,7 @@ err_spin_hi:
err_wedged:
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -862,14 +1042,13 @@ struct preempt_client {
struct i915_gem_context *ctx;
};
-static int preempt_client_init(struct drm_i915_private *i915,
- struct preempt_client *c)
+static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
{
- c->ctx = kernel_context(i915);
+ c->ctx = kernel_context(gt->i915);
if (!c->ctx)
return -ENOMEM;
- if (igt_spinner_init(&c->spin, &i915->gt))
+ if (igt_spinner_init(&c->spin, gt))
goto err_ctx;
return 0;
@@ -887,7 +1066,7 @@ static void preempt_client_fini(struct preempt_client *c)
static int live_nopreempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client a, b;
enum intel_engine_id id;
@@ -898,16 +1077,16 @@ static int live_nopreempt(void *arg)
* that may be being observed and not want to be interrupted.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (preempt_client_init(i915, &a))
+ if (preempt_client_init(gt, &a))
return -ENOMEM;
- if (preempt_client_init(i915, &b))
+ if (preempt_client_init(gt, &b))
goto err_client_a;
b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
if (!intel_engine_has_preemption(engine))
@@ -967,7 +1146,7 @@ static int live_nopreempt(void *arg)
goto err_wedged;
}
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
}
@@ -981,14 +1160,14 @@ err_client_a:
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
static int live_suppress_self_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
@@ -1004,28 +1183,28 @@ static int live_suppress_self_preempt(void *arg)
* completion event.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0; /* presume black blox */
- if (intel_vgpu_active(i915))
+ if (intel_vgpu_active(gt->i915))
return 0; /* GVT forces single port & request submission */
- if (preempt_client_init(i915, &a))
+ if (preempt_client_init(gt, &a))
return -ENOMEM;
- if (preempt_client_init(i915, &b))
+ if (preempt_client_init(gt, &b))
goto err_client_a;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
int depth;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
intel_engine_pm_get(engine);
@@ -1086,7 +1265,7 @@ static int live_suppress_self_preempt(void *arg)
}
intel_engine_pm_put(engine);
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
}
@@ -1100,7 +1279,7 @@ err_client_a:
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
@@ -1160,7 +1339,7 @@ static void dummy_request_free(struct i915_request *dummy)
static int live_suppress_wait_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct preempt_client client[4];
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1173,19 +1352,19 @@ static int live_suppress_wait_preempt(void *arg)
* not needlessly generate preempt-to-idle cycles.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
+ if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
return -ENOMEM;
- if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+ if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
goto err_client_0;
- if (preempt_client_init(i915, &client[2])) /* head of queue */
+ if (preempt_client_init(gt, &client[2])) /* head of queue */
goto err_client_1;
- if (preempt_client_init(i915, &client[3])) /* bystander */
+ if (preempt_client_init(gt, &client[3])) /* bystander */
goto err_client_2;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
int depth;
if (!intel_engine_has_preemption(engine))
@@ -1240,7 +1419,7 @@ static int live_suppress_wait_preempt(void *arg)
for (i = 0; i < ARRAY_SIZE(client); i++)
igt_spinner_end(&client[i].spin);
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
if (engine->execlists.preempt_hang.count) {
@@ -1268,14 +1447,14 @@ err_client_0:
err_wedged:
for (i = 0; i < ARRAY_SIZE(client); i++)
igt_spinner_end(&client[i].spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_3;
}
static int live_chain_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client hi, lo;
enum intel_engine_id id;
@@ -1287,16 +1466,16 @@ static int live_chain_preempt(void *arg)
* the previously submitted spinner in B.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (preempt_client_init(i915, &hi))
+ if (preempt_client_init(gt, &hi))
return -ENOMEM;
- if (preempt_client_init(i915, &lo))
+ if (preempt_client_init(gt, &lo))
goto err_client_hi;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
};
@@ -1327,7 +1506,7 @@ static int live_chain_preempt(void *arg)
goto err_wedged;
}
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_wedged;
}
@@ -1365,7 +1544,7 @@ static int live_chain_preempt(void *arg)
igt_spinner_end(&hi.spin);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to preempt over chain of %d\n",
count);
@@ -1381,7 +1560,7 @@ static int live_chain_preempt(void *arg)
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to flush low priority chain of %d requests\n",
count);
@@ -1407,45 +1586,45 @@ err_client_hi:
err_wedged:
igt_spinner_end(&hi.spin);
igt_spinner_end(&lo.spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_lo;
}
static int live_preempt_hang(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (!intel_has_reset_engine(&i915->gt))
+ if (!intel_has_reset_engine(gt))
return 0;
- if (igt_spinner_init(&spin_hi, &i915->gt))
+ if (igt_spinner_init(&spin_hi, gt))
return -ENOMEM;
- if (igt_spinner_init(&spin_lo, &i915->gt))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
@@ -1462,7 +1641,7 @@ static int live_preempt_hang(void *arg)
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -1484,28 +1663,28 @@ static int live_preempt_hang(void *arg)
HZ / 10)) {
pr_err("Preemption did not occur within timeout!");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
- set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
intel_engine_reset(engine, NULL);
- clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
engine->execlists.preempt_hang.inject_hang = false;
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- if (igt_flush_test(i915)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -1534,7 +1713,7 @@ static int random_priority(struct rnd_state *rnd)
}
struct preempt_smoke {
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct i915_gem_context **contexts;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *batch;
@@ -1634,7 +1813,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
unsigned long count;
int err = 0;
- for_each_engine(engine, smoke->i915, id) {
+ for_each_engine(engine, smoke->gt, id) {
arg[id] = *smoke;
arg[id].engine = engine;
if (!(flags & BATCH))
@@ -1651,7 +1830,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
}
count = 0;
- for_each_engine(engine, smoke->i915, id) {
+ for_each_engine(engine, smoke->gt, id) {
int status;
if (IS_ERR_OR_NULL(tsk[id]))
@@ -1668,7 +1847,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
return 0;
}
@@ -1680,7 +1859,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
count = 0;
do {
- for_each_engine(smoke->engine, smoke->i915, id) {
+ for_each_engine(smoke->engine, smoke->gt, id) {
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
@@ -1696,14 +1875,14 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
return 0;
}
static int live_preempt_smoke(void *arg)
{
struct preempt_smoke smoke = {
- .i915 = arg,
+ .gt = arg,
.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
.ncontext = 1024,
};
@@ -1713,7 +1892,7 @@ static int live_preempt_smoke(void *arg)
u32 *cs;
int n;
- if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
return 0;
smoke.contexts = kmalloc_array(smoke.ncontext,
@@ -1722,7 +1901,8 @@ static int live_preempt_smoke(void *arg)
if (!smoke.contexts)
return -ENOMEM;
- smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
+ smoke.batch =
+ i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
if (IS_ERR(smoke.batch)) {
err = PTR_ERR(smoke.batch);
goto err_free;
@@ -1739,13 +1919,13 @@ static int live_preempt_smoke(void *arg)
i915_gem_object_flush_map(smoke.batch);
i915_gem_object_unpin_map(smoke.batch);
- if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
+ if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
err = -EIO;
goto err_batch;
}
for (n = 0; n < smoke.ncontext; n++) {
- smoke.contexts[n] = kernel_context(smoke.i915);
+ smoke.contexts[n] = kernel_context(smoke.gt->i915);
if (!smoke.contexts[n])
goto err_ctx;
}
@@ -1778,7 +1958,7 @@ err_free:
return err;
}
-static int nop_virtual_engine(struct drm_i915_private *i915,
+static int nop_virtual_engine(struct intel_gt *gt,
struct intel_engine_cs **siblings,
unsigned int nsibling,
unsigned int nctx,
@@ -1797,7 +1977,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
for (n = 0; n < nctx; n++) {
- ctx[n] = kernel_context(i915);
+ ctx[n] = kernel_context(gt->i915);
if (!ctx[n]) {
err = -ENOMEM;
nctx = n;
@@ -1822,7 +2002,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
}
}
- err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
if (err)
goto out;
@@ -1869,7 +2049,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
request[nc]->fence.context,
request[nc]->fence.seqno);
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
break;
}
}
@@ -1891,7 +2071,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
prime, div64_u64(ktime_to_ns(times[1]), prime));
out:
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (nc = 0; nc < nctx; nc++) {
@@ -1904,19 +2084,18 @@ out:
static int live_virtual_engine(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_engine_cs *engine;
- struct intel_gt *gt = &i915->gt;
enum intel_engine_id id;
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- for_each_engine(engine, i915, id) {
- err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+ for_each_engine(engine, gt, id) {
+ err = nop_virtual_engine(gt, &engine, 1, 1, 0);
if (err) {
pr_err("Failed to wrap engine %s: err=%d\n",
engine->name, err);
@@ -1938,13 +2117,13 @@ static int live_virtual_engine(void *arg)
continue;
for (n = 1; n <= nsibling + 1; n++) {
- err = nop_virtual_engine(i915, siblings, nsibling,
+ err = nop_virtual_engine(gt, siblings, nsibling,
n, 0);
if (err)
return err;
}
- err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+ err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
if (err)
return err;
}
@@ -1952,7 +2131,7 @@ static int live_virtual_engine(void *arg)
return 0;
}
-static int mask_virtual_engine(struct drm_i915_private *i915,
+static int mask_virtual_engine(struct intel_gt *gt,
struct intel_engine_cs **siblings,
unsigned int nsibling)
{
@@ -1968,7 +2147,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
* restrict it to our desired engine within the virtual engine.
*/
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
return -ENOMEM;
@@ -1982,7 +2161,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
if (err)
goto out_put;
- err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
if (err)
goto out_unpin;
@@ -2013,7 +2192,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
request[n]->fence.context,
request[n]->fence.seqno);
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto out;
}
@@ -2029,7 +2208,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
err = igt_live_test_end(&t);
out:
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < nsibling; n++)
@@ -2046,13 +2225,12 @@ out_close:
static int live_virtual_mask(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- struct intel_gt *gt = &i915->gt;
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -2068,7 +2246,7 @@ static int live_virtual_mask(void *arg)
if (nsibling < 2)
continue;
- err = mask_virtual_engine(i915, siblings, nsibling);
+ err = mask_virtual_engine(gt, siblings, nsibling);
if (err)
return err;
}
@@ -2076,7 +2254,158 @@ static int live_virtual_mask(void *arg)
return 0;
}
-static int bond_virtual_engine(struct drm_i915_private *i915,
+static int preserved_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ struct i915_request *last = NULL;
+ struct i915_gem_context *ctx;
+ struct intel_context *ve;
+ struct i915_vma *scratch;
+ struct igt_live_test t;
+ unsigned int n;
+ int err = 0;
+ u32 *cs;
+
+ ctx = kernel_context(gt->i915);
+ if (!ctx)
+ return -ENOMEM;
+
+ scratch = create_scratch(siblings[0]->gt);
+ if (IS_ERR(scratch)) {
+ err = PTR_ERR(scratch);
+ goto out_close;
+ }
+
+ ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ if (IS_ERR(ve)) {
+ err = PTR_ERR(ve);
+ goto out_scratch;
+ }
+
+ err = intel_context_pin(ve);
+ if (err)
+ goto out_put;
+
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
+ if (err)
+ goto out_unpin;
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ struct intel_engine_cs *engine = siblings[n % nsibling];
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_end;
+ }
+
+ i915_request_put(last);
+ last = i915_request_get(rq);
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ err = PTR_ERR(cs);
+ goto out_end;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
+ *cs++ = n + 1;
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ /* Restrict this request to run on a particular engine */
+ rq->execution_mask = engine->mask;
+ i915_request_add(rq);
+ }
+
+ if (i915_request_wait(last, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto out_end;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto out_end;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n] != n) {
+ pr_err("Incorrect value[%d] found for GPR[%d]\n",
+ cs[n], n);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+out_end:
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ i915_request_put(last);
+out_unpin:
+ intel_context_unpin(ve);
+out_put:
+ intel_context_put(ve);
+out_scratch:
+ i915_vma_unpin_and_release(&scratch, 0);
+out_close:
+ kernel_context_close(ctx);
+ return err;
+}
+
+static int live_virtual_preserved(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class, inst;
+
+ /*
+ * Check that the context image retains non-privileged (user) registers
+ * from one engine to the next. For this we check that the CS_GPR
+ * are preserved.
+ */
+
+ if (USES_GUC_SUBMISSION(gt->i915))
+ return 0;
+
+ /* As we use CS_GPR we cannot run before they existed on all engines. */
+ if (INTEL_GEN(gt->i915) < 9)
+ return 0;
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, err;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!gt->engine_class[class][inst])
+ continue;
+
+ siblings[nsibling++] = gt->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ err = preserved_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int bond_virtual_engine(struct intel_gt *gt,
unsigned int class,
struct intel_engine_cs **siblings,
unsigned int nsibling,
@@ -2092,13 +2421,13 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
return -ENOMEM;
err = 0;
rq[0] = ERR_PTR(-ENOMEM);
- for_each_engine(master, i915, id) {
+ for_each_engine(master, gt, id) {
struct i915_sw_fence fence = {};
if (master->class == class)
@@ -2203,7 +2532,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
out:
for (n = 0; !IS_ERR(rq[n]); n++)
i915_request_put(rq[n]);
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
kernel_context_close(ctx);
@@ -2220,13 +2549,12 @@ static int live_virtual_bond(void *arg)
{ "schedule", BOND_SCHEDULE },
{ },
};
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- struct intel_gt *gt = &i915->gt;
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -2245,7 +2573,7 @@ static int live_virtual_bond(void *arg)
continue;
for (p = phases; p->name; p++) {
- err = bond_virtual_engine(i915,
+ err = bond_virtual_engine(gt,
class, siblings, nsibling,
p->flags);
if (err) {
@@ -2266,6 +2594,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_unlite_switch),
SUBTEST(live_unlite_preempt),
SUBTEST(live_timeslice_preempt),
+ SUBTEST(live_timeslice_queue),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
@@ -2277,6 +2606,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
+ SUBTEST(live_virtual_preserved),
SUBTEST(live_virtual_bond),
};
@@ -2286,7 +2616,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(&i915->gt))
return 0;
- return i915_live_subtests(tests, i915);
+ return intel_gt_live_subtests(tests, &i915->gt);
}
static void hexdump(const void *buf, size_t len)
@@ -2336,7 +2666,7 @@ static int live_lrc_layout(void *arg)
return -ENOMEM;
err = 0;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
u32 *hw, *lrc;
int dw;
@@ -2419,10 +2749,280 @@ static int live_lrc_layout(void *arg)
return err;
}
+static int __live_lrc_state(struct i915_gem_context *fixme,
+ struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ enum {
+ RING_START_IDX = 0,
+ RING_TAIL_IDX,
+ MAX_IDX
+ };
+ u32 expected[MAX_IDX];
+ u32 *cs;
+ int err;
+ int n;
+
+ ce = intel_context_create(fixme, engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = intel_context_pin(ce);
+ if (err)
+ goto err_put;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ cs = intel_ring_begin(rq, 4 * MAX_IDX);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_unpin;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ expected[RING_TAIL_IDX] = ce->ring->tail;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < MAX_IDX; n++) {
+ if (cs[n] != expected[n]) {
+ pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+ engine->name, n, cs[n], expected[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_unpin:
+ intel_context_unpin(ce);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *fixme;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the live register state matches what we expect for this
+ * intel_context.
+ */
+
+ fixme = kernel_context(gt->i915);
+ if (!fixme)
+ return -ENOMEM;
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch)) {
+ err = PTR_ERR(scratch);
+ goto out_close;
+ }
+
+ for_each_engine(engine, gt, id) {
+ err = __live_lrc_state(fixme, engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+out_close:
+ kernel_context_close(fixme);
+ return err;
+}
+
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+ u32 *cs;
+ int n;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = STACK_MAGIC;
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+ i915_request_add(rq);
+
+ return 0;
+}
+
+static int __live_gpr_clear(struct i915_gem_context *fixme,
+ struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+ int n;
+
+ if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+ return 0; /* GPR only on rcs0 for gen8 */
+
+ err = gpr_make_dirty(engine);
+ if (err)
+ return err;
+
+ ce = intel_context_create(fixme, engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_put;
+ }
+
+ cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_put;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n]) {
+ pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+ engine->name,
+ n / 2, n & 1 ? "udw" : "ldw",
+ cs[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_gpr_clear(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *fixme;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that GPR registers are cleared in new contexts as we need
+ * to avoid leaking any information from previous contexts.
+ */
+
+ fixme = kernel_context(gt->i915);
+ if (!fixme)
+ return -ENOMEM;
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch)) {
+ err = PTR_ERR(scratch);
+ goto out_close;
+ }
+
+ for_each_engine(engine, gt, id) {
+ err = __live_gpr_clear(fixme, engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+out_close:
+ kernel_context_close(fixme);
+ return err;
+}
+
int intel_lrc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_layout),
+ SUBTEST(live_lrc_state),
+ SUBTEST(live_gpr_clear),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index d79482db7fe8..6efb9221b7fa 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -17,7 +17,7 @@ static int igt_global_reset(void *arg)
/* Check that we can issue a global GPU reset */
igt_global_reset_lock(gt);
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
reset_count = i915_reset_count(&gt->i915->gpu_error);
@@ -28,7 +28,7 @@ static int igt_global_reset(void *arg)
err = -EINVAL;
}
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
@@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg)
/* Check that we can recover a wedged device with a GPU reset */
igt_global_reset_lock(gt);
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
intel_gt_set_wedged(gt);
GEM_BUG_ON(!intel_gt_is_wedged(gt));
intel_gt_reset(gt, ALL_ENGINES, NULL);
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt);
return intel_gt_is_wedged(gt) ? -EIO : 0;
@@ -125,7 +125,7 @@ static int igt_atomic_engine_reset(void *arg)
if (!igt_force_reset(gt))
goto out_unlock;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
tasklet_disable_nosync(&engine->execlists.tasklet);
intel_engine_pm_get(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index d6df40cdc8a6..dac86f699a4c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -35,7 +35,7 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl)
#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
struct mock_hwsp_freelist {
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct radix_tree_root cachelines;
struct intel_timeline **history;
unsigned long count, max;
@@ -68,7 +68,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
unsigned long cacheline;
int err;
- tl = intel_timeline_create(&state->i915->gt, NULL);
+ tl = intel_timeline_create(state->gt, NULL);
if (IS_ERR(tl))
return PTR_ERR(tl);
@@ -106,6 +106,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
static int mock_hwsp_freelist(void *arg)
{
struct mock_hwsp_freelist state;
+ struct drm_i915_private *i915;
const struct {
const char *name;
unsigned int flags;
@@ -117,12 +118,14 @@ static int mock_hwsp_freelist(void *arg)
unsigned int na;
int err = 0;
+ i915 = mock_gem_device();
+ if (!i915)
+ return -ENOMEM;
+
INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
- state.i915 = mock_gem_device();
- if (!state.i915)
- return -ENOMEM;
+ state.gt = &i915->gt;
/*
* Create a bunch of timelines and check that their HWSP do not overlap.
@@ -151,7 +154,7 @@ out:
__mock_hwsp_record(&state, na, NULL);
kfree(state.history);
err_put:
- drm_dev_put(&state.i915->drm);
+ drm_dev_put(&i915->drm);
return err;
}
@@ -476,11 +479,11 @@ out:
}
static struct intel_timeline *
-checked_intel_timeline_create(struct drm_i915_private *i915)
+checked_intel_timeline_create(struct intel_gt *gt)
{
struct intel_timeline *tl;
- tl = intel_timeline_create(&i915->gt, NULL);
+ tl = intel_timeline_create(gt, NULL);
if (IS_ERR(tl))
return tl;
@@ -497,7 +500,7 @@ checked_intel_timeline_create(struct drm_i915_private *i915)
static int live_hwsp_engine(void *arg)
{
#define NUM_TIMELINES 4096
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -516,7 +519,7 @@ static int live_hwsp_engine(void *arg)
return -ENOMEM;
count = 0;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_can_store_dword(engine))
continue;
@@ -526,7 +529,7 @@ static int live_hwsp_engine(void *arg)
struct intel_timeline *tl;
struct i915_request *rq;
- tl = checked_intel_timeline_create(i915);
+ tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
break;
@@ -548,7 +551,7 @@ static int live_hwsp_engine(void *arg)
break;
}
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < count; n++) {
@@ -570,7 +573,7 @@ static int live_hwsp_engine(void *arg)
static int live_hwsp_alternate(void *arg)
{
#define NUM_TIMELINES 4096
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -591,14 +594,14 @@ static int live_hwsp_alternate(void *arg)
count = 0;
for (n = 0; n < NUM_TIMELINES; n++) {
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_timeline *tl;
struct i915_request *rq;
if (!intel_engine_can_store_dword(engine))
continue;
- tl = checked_intel_timeline_create(i915);
+ tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
intel_engine_pm_put(engine);
err = PTR_ERR(tl);
@@ -620,7 +623,7 @@ static int live_hwsp_alternate(void *arg)
}
out:
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < count; n++) {
@@ -641,8 +644,7 @@ out:
static int live_hwsp_wrap(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct intel_timeline *tl;
enum intel_engine_id id;
@@ -664,7 +666,7 @@ static int live_hwsp_wrap(void *arg)
if (err)
goto out_free;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
const u32 *hwsp_seqno[2];
struct i915_request *rq;
u32 seqno[2];
@@ -740,7 +742,7 @@ static int live_hwsp_wrap(void *arg)
}
out:
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
intel_timeline_unpin(tl);
@@ -751,7 +753,7 @@ out_free:
static int live_hwsp_recycle(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
unsigned long count;
@@ -764,7 +766,7 @@ static int live_hwsp_recycle(void *arg)
*/
count = 0;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
IGT_TIMEOUT(end_time);
if (!intel_engine_can_store_dword(engine))
@@ -776,7 +778,7 @@ static int live_hwsp_recycle(void *arg)
struct intel_timeline *tl;
struct i915_request *rq;
- tl = checked_intel_timeline_create(i915);
+ tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
break;
@@ -831,5 +833,5 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(&i915->gt))
return 0;
- return i915_live_subtests(tests, i915);
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 95627e80f246..ef02920cec29 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -33,8 +33,32 @@ struct wa_lists {
} engine[I915_NUM_ENGINES];
};
+static int request_add_sync(struct i915_request *rq, int err)
+{
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIMEDOUT;
+ i915_request_put(rq);
+
+ return err;
+}
+
static void
-reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_init(struct intel_gt *gt, struct wa_lists *lists)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -42,10 +66,10 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
memset(lists, 0, sizeof(*lists));
wa_init_start(&lists->gt_wa_list, "GT_REF", "global");
- gt_init_workarounds(i915, &lists->gt_wa_list);
+ gt_init_workarounds(gt->i915, &lists->gt_wa_list);
wa_init_finish(&lists->gt_wa_list);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_wa_list *wal = &lists->engine[id].wa_list;
wa_init_start(wal, "REF", engine->name);
@@ -59,12 +83,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
}
static void
-reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
intel_wa_list_free(&lists->engine[id].wa_list);
intel_wa_list_free(&lists->gt_wa_list);
@@ -191,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
err = 0;
i915_gem_object_lock(results);
- intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */
+ intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
i915_gem_object_unlock(results);
- if (intel_gt_is_wedged(&ctx->i915->gt))
+ if (intel_gt_is_wedged(engine->gt))
err = -EIO;
if (err)
goto out_put;
@@ -243,7 +267,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
struct i915_gem_context *ctx;
struct intel_context *ce;
struct i915_request *rq;
- intel_wakeref_t wakeref;
int err = 0;
ctx = kernel_context(engine->i915);
@@ -255,9 +278,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
GEM_BUG_ON(IS_ERR(ce));
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref)
- rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+ rq = igt_spinner_create_request(spin, ce, MI_NOOP);
intel_context_put(ce);
@@ -267,13 +288,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
goto err;
}
- i915_request_add(rq);
-
- if (spin && !igt_wait_for_spinner(spin, rq)) {
- pr_err("Spinner failed to start\n");
- err = -ETIMEDOUT;
- }
-
+ err = request_add_spin(rq, spin);
err:
if (err && spin)
igt_spinner_end(spin);
@@ -313,7 +328,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
if (err)
goto out_spin;
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(engine->uncore->rpm, wakeref)
err = reset(engine);
igt_spinner_end(&spin);
@@ -586,15 +601,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
goto err_request;
err_request:
- i915_request_add(rq);
- if (err)
- goto out_batch;
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = request_add_sync(rq, err);
+ if (err) {
pr_err("%s: Futzing %x timedout; cancelling test\n",
engine->name, reg);
- intel_gt_set_wedged(&ctx->i915->gt);
- err = -EIO;
+ intel_gt_set_wedged(engine->gt);
goto out_batch;
}
@@ -693,34 +704,29 @@ out_scratch:
static int live_dirty_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
struct drm_file *file;
int err = 0;
/* Can the user write to the whitelisted registers? */
- if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
+ if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */
return 0;
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ file = mock_file(gt->i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
- file = mock_file(i915);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- goto out_rpm;
- }
-
- ctx = live_context(i915, file);
+ ctx = live_context(gt->i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out_file;
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (engine->whitelist.count == 0)
continue;
@@ -730,43 +736,43 @@ static int live_dirty_whitelist(void *arg)
}
out_file:
- mock_file_free(i915, file);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mock_file_free(gt->i915, file);
return err;
}
static int live_reset_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine = i915->engine[RCS0];
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int err = 0;
/* If we reset the gpu, we should not lose the RING_NONPRIV */
+ igt_global_reset_lock(gt);
- if (!engine || engine->whitelist.count == 0)
- return 0;
-
- igt_global_reset_lock(&i915->gt);
+ for_each_engine(engine, gt, id) {
+ if (engine->whitelist.count == 0)
+ continue;
- if (intel_has_reset_engine(&i915->gt)) {
- err = check_whitelist_across_reset(engine,
- do_engine_reset,
- "engine");
- if (err)
- goto out;
- }
+ if (intel_has_reset_engine(gt)) {
+ err = check_whitelist_across_reset(engine,
+ do_engine_reset,
+ "engine");
+ if (err)
+ goto out;
+ }
- if (intel_has_gpu_reset(&i915->gt)) {
- err = check_whitelist_across_reset(engine,
- do_device_reset,
- "device");
- if (err)
- goto out;
+ if (intel_has_gpu_reset(gt)) {
+ err = check_whitelist_across_reset(engine,
+ do_device_reset,
+ "device");
+ if (err)
+ goto out;
+ }
}
out:
- igt_global_reset_unlock(&i915->gt);
+ igt_global_reset_unlock(gt);
return err;
}
@@ -782,6 +788,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
if (IS_ERR(rq))
return PTR_ERR(rq);
+ i915_vma_lock(results);
+ err = i915_request_await_object(rq, results->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(results);
+ if (err)
+ goto err_req;
+
srm = MI_STORE_REGISTER_MEM;
if (INTEL_GEN(ctx->i915) >= 8)
srm++;
@@ -807,12 +821,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
intel_ring_advance(rq, cs);
err_req:
- i915_request_add(rq);
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -EIO;
-
- return err;
+ return request_add_sync(rq, err);
}
static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
@@ -872,9 +881,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
err_request:
- i915_request_add(rq);
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -EIO;
+ err = request_add_sync(rq, err);
err_unpin:
i915_gem_object_unpin_map(batch->obj);
@@ -991,7 +998,7 @@ err_a:
static int live_isolated_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct {
struct i915_gem_context *ctx;
struct i915_vma *scratch[2];
@@ -1005,17 +1012,14 @@ static int live_isolated_whitelist(void *arg)
* invisible to a second context.
*/
- if (!intel_engines_has_context_isolation(i915))
- return 0;
-
- if (!i915->kernel_context->vm)
+ if (!intel_engines_has_context_isolation(gt->i915))
return 0;
for (i = 0; i < ARRAY_SIZE(client); i++) {
struct i915_address_space *vm;
struct i915_gem_context *c;
- c = kernel_context(i915);
+ c = kernel_context(gt->i915);
if (IS_ERR(c)) {
err = PTR_ERR(c);
goto err;
@@ -1044,7 +1048,10 @@ static int live_isolated_whitelist(void *arg)
i915_vm_put(vm);
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
+ if (!engine->kernel_context->vm)
+ continue;
+
if (!whitelist_writable_count(engine))
continue;
@@ -1098,7 +1105,7 @@ err:
kernel_context_close(client[i].ctx);
}
- if (igt_flush_test(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
return err;
@@ -1133,16 +1140,16 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
static int
live_gpu_reset_workarounds(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx;
intel_wakeref_t wakeref;
struct wa_lists lists;
bool ok;
- if (!intel_has_gpu_reset(&i915->gt))
+ if (!intel_has_gpu_reset(gt))
return 0;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1150,25 +1157,25 @@ live_gpu_reset_workarounds(void *arg)
pr_info("Verifying after GPU reset...\n");
- igt_global_reset_lock(&i915->gt);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reference_lists_init(i915, &lists);
+ reference_lists_init(gt, &lists);
ok = verify_wa_lists(ctx, &lists, "before reset");
if (!ok)
goto out;
- intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds");
+ intel_gt_reset(gt, ALL_ENGINES, "live_workarounds");
ok = verify_wa_lists(ctx, &lists, "after reset");
out:
i915_gem_context_unlock_engines(ctx);
kernel_context_close(ctx);
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(&i915->gt);
+ reference_lists_fini(gt, &lists);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
return ok ? 0 : -ESRCH;
}
@@ -1176,7 +1183,7 @@ out:
static int
live_engine_reset_workarounds(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx;
struct intel_context *ce;
@@ -1186,17 +1193,17 @@ live_engine_reset_workarounds(void *arg)
struct wa_lists lists;
int ret = 0;
- if (!intel_has_reset_engine(&i915->gt))
+ if (!intel_has_reset_engine(gt))
return 0;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- igt_global_reset_lock(&i915->gt);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reference_lists_init(i915, &lists);
+ reference_lists_init(gt, &lists);
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
struct intel_engine_cs *engine = ce->engine;
@@ -1229,12 +1236,10 @@ live_engine_reset_workarounds(void *arg)
goto err;
}
- i915_request_add(rq);
-
- if (!igt_wait_for_spinner(&spin, rq)) {
+ ret = request_add_spin(rq, &spin);
+ if (ret) {
pr_err("Spinner failed to start\n");
igt_spinner_fini(&spin);
- ret = -ETIMEDOUT;
goto err;
}
@@ -1251,12 +1256,12 @@ live_engine_reset_workarounds(void *arg)
}
err:
i915_gem_context_unlock_engines(ctx);
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(&i915->gt);
+ reference_lists_fini(gt, &lists);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
kernel_context_close(ctx);
- igt_flush_test(i915);
+ igt_flush_test(gt->i915);
return ret;
}
@@ -1274,5 +1279,5 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(&i915->gt))
return 0;
- return i915_subtests(tests, i915);
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 249c747e9756..37f7bcbf7dac 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -9,6 +9,27 @@
#include "intel_guc_submission.h"
#include "i915_drv.h"
+/**
+ * DOC: GuC
+ *
+ * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is
+ * designed to offload some of the functionality usually performed by the host
+ * driver; currently the main operations it can take care of are:
+ *
+ * - Authentication of the HuC, which is required to fully enable HuC usage.
+ * - Low latency graphics context scheduling (a.k.a. GuC submission).
+ * - GT Power management.
+ *
+ * The enable_guc module parameter can be used to select which of those
+ * operations to enable within GuC. Note that not all the operations are
+ * supported on all gen9+ platforms.
+ *
+ * Enabling the GuC is not mandatory and therefore the firmware is only loaded
+ * if at least one of the operations is selected. However, not loading the GuC
+ * might result in the loss of some features that do require the GuC (currently
+ * just the HuC, but more are expected to land in the future).
+ */
+
static void gen8_guc_raise_irq(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -548,9 +569,15 @@ int intel_guc_resume(struct intel_guc *guc)
}
/**
- * DOC: GuC Address Space
+ * DOC: GuC Memory Management
*
- * The layout of GuC address space is shown below:
+ * GuC can't allocate any memory for its own usage, so all the allocations must
+ * be handled by the host driver. GuC accesses the memory via the GGTT, with the
+ * exception of the top and bottom parts of the 4GB address space, which are
+ * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM)
+ * or other parts of the HW. The driver must take care not to place objects that
+ * the GuC is going to access in these reserved ranges. The layout of the GuC
+ * address space is shown below:
*
* ::
*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 36332064de9c..2cf2d3314f62 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -607,7 +607,6 @@ out_unlock:
void intel_guc_log_relay_flush(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
intel_wakeref_t wakeref;
/*
@@ -616,7 +615,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
*/
flush_work(&log->relay.flush_work);
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref)
guc_action_flush_log(guc);
/* GuC would have updated log buffer by now, so capture it */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f325d3dd564f..009e54a3764f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -29,6 +29,12 @@ enum {
/**
* DOC: GuC-based command submission
*
+ * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
+ * firmware is moving to an updated submission interface and we plan to
+ * turn submission back on when that lands. The below documentation (and related
+ * code) matches the old submission model and will be updated as part of the
+ * upgrade to the new flow.
+ *
* GuC client:
* A intel_guc_client refers to a submission path through GuC. Currently, there
* is only one client, which is charged with all submissions to the GuC. This
@@ -1014,7 +1020,7 @@ static void guc_interrupts_capture(struct intel_gt *gt)
* to GuC
*/
irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
/* route USER_INTERRUPT to Host, all others are sent to GuC. */
@@ -1062,7 +1068,7 @@ static void guc_interrupts_release(struct intel_gt *gt)
*/
irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
/* route all GT interrupts to the host */
@@ -1145,7 +1151,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
/* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(gt);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
engine->set_default_submission = guc_set_default_submission;
engine->set_default_submission(engine);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d4625c97b4f9..8be515c8d0f0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -9,6 +9,34 @@
#include "intel_huc.h"
#include "i915_drv.h"
+/**
+ * DOC: HuC
+ *
+ * The HuC is a dedicated microcontroller for usage in media HEVC (High
+ * Efficiency Video Coding) operations. Userspace can directly use the firmware
+ * capabilities by adding HuC specific commands to batch buffers.
+ *
+ * The kernel driver is only responsible for loading the HuC firmware and
+ * triggering its security authentication, which is performed by the GuC. For
+ * The GuC to correctly perform the authentication, the HuC binary must be
+ * loaded before the GuC one. Loading the HuC is optional; however, not using
+ * the HuC might negatively impact power usage and/or performance of media
+ * workloads, depending on the use-cases.
+ *
+ * See https://github.com/intel/media-driver for the latest details on HuC
+ * functionality.
+ */
+
+/**
+ * DOC: HuC Memory Management
+ *
+ * Similarly to the GuC, the HuC can't do any memory allocations on its own,
+ * with the difference being that the allocations for HuC usage are handled by
+ * the userspace driver instead of the kernel one. The HuC accesses the memory
+ * via the PPGTT belonging to the context loaded on the VCS executing the
+ * HuC-specific commands.
+ */
+
void intel_huc_init_early(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -118,10 +146,9 @@ void intel_huc_fini(struct intel_huc *huc)
*
* Called after HuC and GuC firmware loading during intel_uc_init_hw().
*
- * This function pins HuC firmware image object into GGTT.
- * Then it invokes GuC action to authenticate passing the offset to RSA
- * signature through intel_guc_auth_huc(). It then waits for 50ms for
- * firmware verification ACK and unpins the object.
+ * This function invokes the GuC action to authenticate the HuC firmware,
+ * passing the offset of the RSA signature to intel_guc_auth_huc(). It then
+ * waits for up to 50ms for firmware verification ACK.
*/
int intel_huc_auth(struct intel_huc *huc)
{
@@ -185,7 +212,7 @@ int intel_huc_check_status(struct intel_huc *huc)
if (!intel_huc_is_supported(huc))
return -ENODEV;
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
status = intel_uncore_read(gt->uncore, huc->status.reg);
return (status & huc->status.mask) == huc->status.value;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 74602487ed67..d654340d4d03 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,21 +8,6 @@
#include "i915_drv.h"
/**
- * DOC: HuC Firmware
- *
- * Motivation:
- * GEN9 introduces a new dedicated firmware for usage in media HEVC (High
- * Efficiency Video Coding) operations. Userspace can use the firmware
- * capabilities by adding HuC specific commands to batch buffers.
- *
- * Implementation:
- * The same firmware loader is used as the GuC. However, the actual
- * loading to HW is deferred until GEM initialization is done.
- *
- * Note that HuC firmware loading must be done before GuC loading.
- */
-
-/**
* intel_huc_fw_init_early() - initializes HuC firmware struct
* @huc: intel_huc struct
*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 29a9eec60d2e..3fdbc935d155 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -587,7 +587,7 @@ void intel_uc_suspend(struct intel_uc *uc)
if (!intel_guc_is_running(guc))
return;
- with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
intel_uc_runtime_suspend(uc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index f8f6c91a0df6..029214cdedd5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -39,9 +39,6 @@
* 3. Length info of each component can be found in header, in dwords.
* 4. Modulus and exponent key are not required by driver. They may not appear
* in fw. So driver will load a truncated firmware in this case.
- *
- * The only difference between GuC and HuC firmwares is how the version
- * information is saved.
*/
struct uc_css_header {
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index f927f851aadf..d8a80388bd31 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -108,22 +108,15 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client)
* validating that the doorbells status expected by the driver matches what the
* GuC/HW have.
*/
-static int igt_guc_clients(void *args)
+static int igt_guc_clients(void *arg)
{
- struct drm_i915_private *dev_priv = args;
+ struct intel_gt *gt = arg;
+ struct intel_guc *guc = &gt->uc.guc;
intel_wakeref_t wakeref;
- struct intel_guc *guc;
int err = 0;
- GEM_BUG_ON(!HAS_GT_UC(dev_priv));
- wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
- guc = &dev_priv->gt.uc.guc;
- if (!guc) {
- pr_err("No guc object!\n");
- err = -EINVAL;
- goto unlock;
- }
+ GEM_BUG_ON(!HAS_GT_UC(gt->i915));
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
err = check_all_doorbells(guc);
if (err)
@@ -188,7 +181,7 @@ out:
guc_clients_create(guc);
guc_clients_enable(guc);
unlock:
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
@@ -199,21 +192,14 @@ unlock:
*/
static int igt_guc_doorbells(void *arg)
{
- struct drm_i915_private *dev_priv = arg;
+ struct intel_gt *gt = arg;
+ struct intel_guc *guc = &gt->uc.guc;
intel_wakeref_t wakeref;
- struct intel_guc *guc;
int i, err = 0;
u16 db_id;
- GEM_BUG_ON(!HAS_GT_UC(dev_priv));
- wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
- guc = &dev_priv->gt.uc.guc;
- if (!guc) {
- pr_err("No guc object!\n");
- err = -EINVAL;
- goto unlock;
- }
+ GEM_BUG_ON(!HAS_GT_UC(gt->i915));
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
err = check_all_doorbells(guc);
if (err)
@@ -295,19 +281,19 @@ out:
guc_client_free(clients[i]);
}
unlock:
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
-int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
+int intel_guc_live_selftest(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_guc_clients),
SUBTEST(igt_guc_doorbells),
};
- if (!USES_GUC_SUBMISSION(dev_priv))
+ if (!USES_GUC_SUBMISSION(i915))
return 0;
- return i915_subtests(tests, dev_priv);
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index d996bbc7ea59..771420453f82 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -198,7 +198,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
mutex_lock(&dev_priv->ggtt.vm.mutex);
for (i = 0; i < vgpu_fence_sz(vgpu); i++) {
- reg = i915_reserve_fence(dev_priv);
+ reg = i915_reserve_fence(&dev_priv->ggtt);
if (IS_ERR(reg))
goto out_free_fence;
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index f21b8fb5b37e..d6e7a1189bad 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -534,7 +534,7 @@ static void clean_execlist(struct intel_vgpu *vgpu,
struct intel_vgpu_submission *s = &vgpu->submission;
intel_engine_mask_t tmp;
- for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
kfree(s->ring_scan_buffer[engine->id]);
s->ring_scan_buffer[engine->id] = NULL;
s->ring_scan_buffer_size[engine->id] = 0;
@@ -548,7 +548,7 @@ static void reset_execlist(struct intel_vgpu *vgpu,
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
- for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+ for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp)
init_vgpu_execlist(vgpu, engine->id);
}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 6850f1f40241..36bb7639e82f 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -415,10 +415,9 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ lockdep_assert_held(&vgpu->vgpu_lock);
if (workload->shadow)
return 0;
@@ -580,8 +579,6 @@ static void update_vreg_in_ctx(struct intel_vgpu_workload *workload)
static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
{
- struct intel_vgpu *vgpu = workload->vgpu;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct intel_vgpu_shadow_bb *bb, *pos;
if (list_empty(&workload->shadow_bb))
@@ -590,8 +587,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
bb = list_first_entry(&workload->shadow_bb,
struct intel_vgpu_shadow_bb, list);
- mutex_lock(&dev_priv->drm.struct_mutex);
-
list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
if (bb->obj) {
if (bb->accessing)
@@ -609,8 +604,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
list_del(&bb->list);
kfree(bb);
}
-
- mutex_unlock(&dev_priv->drm.struct_mutex);
}
static int prepare_workload(struct intel_vgpu_workload *workload)
@@ -685,7 +678,6 @@ err_unpin_mm:
static int dispatch_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct i915_request *rq;
int ring_id = workload->ring_id;
int ret;
@@ -694,7 +686,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
ring_id, workload);
mutex_lock(&vgpu->vgpu_lock);
- mutex_lock(&dev_priv->drm.struct_mutex);
ret = intel_gvt_workload_req_alloc(workload);
if (ret)
@@ -729,7 +720,6 @@ out:
err_req:
if (ret)
workload->status = ret;
- mutex_unlock(&dev_priv->drm.struct_mutex);
mutex_unlock(&vgpu->vgpu_lock);
return ret;
}
@@ -887,7 +877,7 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
intel_engine_mask_t tmp;
/* free the unsubmited workloads in the queues. */
- for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
list_for_each_entry_safe(pos, n,
&s->workload_q_head[engine->id], list) {
list_del_init(&pos->list);
@@ -1594,9 +1584,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
*/
if (list_empty(workload_q_head(vgpu, ring_id))) {
intel_runtime_pm_get(&dev_priv->runtime_pm);
- mutex_lock(&dev_priv->drm.struct_mutex);
+ mutex_lock(&vgpu->vgpu_lock);
ret = intel_gvt_scan_and_shadow_workload(workload);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ mutex_unlock(&vgpu->vgpu_lock);
intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
}
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index aa37c07004b9..7927b1a0c7a6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -590,8 +590,8 @@ match:
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = engine->i915;
intel_engine_mask_t tmp, mask = engine->mask;
+ struct intel_gt *gt = engine->gt;
struct llist_node *pos, *next;
int err;
@@ -603,7 +603,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
* We can then use the preallocated nodes in
* i915_active_acquire_barrier()
*/
- for_each_engine_masked(engine, i915, mask, tmp) {
+ for_each_engine_masked(engine, gt, mask, tmp) {
u64 idx = engine->kernel_context->timeline->fence_context;
struct active_node *node;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 298a3e879e65..ada57eee914a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1660,9 +1660,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
- swizzle_string(dev_priv->mm.bit_6_swizzle_x));
+ swizzle_string(dev_priv->ggtt.bit_6_swizzle_x));
seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
- swizzle_string(dev_priv->mm.bit_6_swizzle_y));
+ swizzle_string(dev_priv->ggtt.bit_6_swizzle_y));
if (IS_GEN_RANGE(dev_priv, 3, 4)) {
seq_printf(m, "DDC = 0x%08x\n",
@@ -2405,6 +2405,13 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
if (INTEL_GEN(dev_priv) >= 12) {
dc5_reg = TGL_DMC_DEBUG_DC5_COUNT;
dc6_reg = TGL_DMC_DEBUG_DC6_COUNT;
+ /*
+ * NOTE: DMC_DEBUG3 is a general purpose reg.
+ * According to B.Specs:49196 DMC f/w reuses DC5/6 counter
+ * reg for DC3CO debugging and validation,
+ * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter.
+ */
+ seq_printf(m, "DC3CO count: %d\n", I915_READ(DMC_DEBUG3));
} else {
dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT :
SKL_CSR_DC3_DC5_COUNT;
@@ -3583,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+ struct drm_i915_private *i915 = data;
+ const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+ /*
+ * This would lead to infinite waits as we're doing timestamp
+ * difference on the CS with only 32bits.
+ */
+ if (val > mul_u32_u32(U32_MAX, clk))
+ return -EINVAL;
+
+ atomic64_set(&i915->perf.noa_programming_delay, val);
+ return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+ struct drm_i915_private *i915 = data;
+
+ *val = atomic64_read(&i915->perf.noa_programming_delay);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+ i915_perf_noa_delay_get,
+ i915_perf_noa_delay_set,
+ "%llu\n");
+
#define DROP_UNBOUND BIT(0)
#define DROP_BOUND BIT(1)
#define DROP_RETIRE BIT(2)
@@ -3592,6 +3630,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
#define DROP_IDLE BIT(6)
#define DROP_RESET_ACTIVE BIT(7)
#define DROP_RESET_SEQNO BIT(8)
+#define DROP_RCU BIT(9)
#define DROP_ALL (DROP_UNBOUND | \
DROP_BOUND | \
DROP_RETIRE | \
@@ -3600,7 +3639,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
DROP_SHRINK_ALL |\
DROP_IDLE | \
DROP_RESET_ACTIVE | \
- DROP_RESET_SEQNO)
+ DROP_RESET_SEQNO | \
+ DROP_RCU)
static int
i915_drop_caches_get(void *data, u64 *val)
{
@@ -3652,6 +3692,9 @@ i915_drop_caches_set(void *data, u64 val)
i915_gem_shrink_all(i915);
fs_reclaim_release(GFP_KERNEL);
+ if (val & DROP_RCU)
+ rcu_barrier();
+
if (val & DROP_FREED)
i915_gem_drain_freed_objects(i915);
@@ -4333,6 +4376,7 @@ static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
} i915_debugfs_files[] = {
+ {"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
{"i915_wedged", &i915_wedged_fops},
{"i915_cache_sharing", &i915_cache_sharing_fops},
{"i915_gem_drop_caches", &i915_drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9354924576c4..157ed22052a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -354,6 +354,8 @@ static void i915_driver_modeset_remove(struct drm_i915_private *i915)
{
intel_modeset_driver_remove(i915);
+ intel_irq_uninstall(i915);
+
intel_bios_driver_remove(i915);
i915_switcheroo_unregister(i915);
@@ -1073,8 +1075,8 @@ intel_get_dram_info(struct drm_i915_private *dev_priv)
static u32 gen9_edram_size_mb(struct drm_i915_private *dev_priv, u32 cap)
{
- const unsigned int ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 };
- const unsigned int sets[4] = { 1, 1, 2, 2 };
+ static const u8 ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 };
+ static const u8 sets[4] = { 1, 1, 2, 2 };
return EDRAM_NUM_BANKS(cap) *
ways[EDRAM_WAYS_IDX(cap)] *
@@ -1804,7 +1806,7 @@ static int i915_drm_resume(struct drm_device *dev)
DRM_ERROR("failed to re-enable GGTT\n");
i915_gem_restore_gtt_mappings(dev_priv);
- i915_gem_restore_fences(dev_priv);
+ i915_gem_restore_fences(&dev_priv->ggtt);
intel_csr_ucode_resume(dev_priv);
@@ -2502,7 +2504,7 @@ static int intel_runtime_suspend(struct device *kdev)
intel_gt_runtime_resume(&dev_priv->gt);
- i915_gem_restore_fences(dev_priv);
+ i915_gem_restore_fences(&dev_priv->ggtt);
enable_rpm_wakeref_asserts(rpm);
@@ -2582,7 +2584,7 @@ static int intel_runtime_resume(struct device *kdev)
* we can do is to hope that things will still work (and disable RPM).
*/
intel_gt_runtime_resume(&dev_priv->gt);
- i915_gem_restore_fences(dev_priv);
+ i915_gem_restore_fences(&dev_priv->ggtt);
/*
* On VLV/CHV display interrupts are part of the display
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1da67b242113..8882c0908c3b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -85,6 +85,7 @@
#include "intel_device_info.h"
#include "intel_pch.h"
#include "intel_runtime_pm.h"
+#include "intel_memory_region.h"
#include "intel_uncore.h"
#include "intel_wakeref.h"
#include "intel_wopcm.h"
@@ -93,6 +94,7 @@
#include "i915_gem_fence_reg.h"
#include "i915_gem_gtt.h"
#include "i915_gpu_error.h"
+#include "i915_perf_types.h"
#include "i915_request.h"
#include "i915_scheduler.h"
#include "gt/intel_timeline.h"
@@ -106,8 +108,8 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
-#define DRIVER_DATE "20191007"
-#define DRIVER_TIMESTAMP 1570451087
+#define DRIVER_DATE "20191021"
+#define DRIVER_TIMESTAMP 1571651766
struct drm_i915_gem_object;
@@ -338,6 +340,7 @@ struct intel_csr {
i915_reg_t mmioaddr[20];
u32 mmiodata[20];
u32 dc_state;
+ u32 target_dc_state;
u32 allowed_dc_mask;
intel_wakeref_t wakeref;
};
@@ -500,6 +503,9 @@ struct i915_psr {
bool sink_not_reliable;
bool irq_aux_error;
u16 su_x_granularity;
+ bool dc3co_enabled;
+ u32 dc3co_exit_delay;
+ struct delayed_work idle_work;
};
#define QUIRK_LVDS_SSC_DISABLE (1<<1)
@@ -599,13 +605,8 @@ struct intel_rps {
struct intel_rps_ei ei;
};
-struct intel_llc_pstate {
- bool enabled;
-};
-
struct intel_gen6_power_mgmt {
struct intel_rps rps;
- struct intel_llc_pstate llc_pstate;
};
/* defined intel_pm.c */
@@ -678,6 +679,8 @@ struct i915_gem_mm {
*/
struct vfsmount *gemfs;
+ struct intel_memory_region *regions[INTEL_REGION_UNKNOWN];
+
struct notifier_block oom_notifier;
struct notifier_block vmap_notifier;
struct shrinker shrinker;
@@ -689,11 +692,6 @@ struct i915_gem_mm {
*/
struct workqueue_struct *userptr_wq;
- /** Bit 6 swizzling required for X tiling */
- u32 bit_6_swizzle_x;
- /** Bit 6 swizzling required for Y tiling */
- u32 bit_6_swizzle_y;
-
/* shrinker accounting, also useful for userland debugging */
u64 shrink_memory;
u32 shrink_count;
@@ -974,305 +972,6 @@ struct intel_wm_config {
bool sprites_scaled;
};
-struct i915_oa_format {
- u32 format;
- int size;
-};
-
-struct i915_oa_reg {
- i915_reg_t addr;
- u32 value;
-};
-
-struct i915_oa_config {
- char uuid[UUID_STRING_LEN + 1];
- int id;
-
- const struct i915_oa_reg *mux_regs;
- u32 mux_regs_len;
- const struct i915_oa_reg *b_counter_regs;
- u32 b_counter_regs_len;
- const struct i915_oa_reg *flex_regs;
- u32 flex_regs_len;
-
- struct attribute_group sysfs_metric;
- struct attribute *attrs[2];
- struct device_attribute sysfs_metric_id;
-
- atomic_t ref_count;
-};
-
-struct i915_perf_stream;
-
-/**
- * struct i915_perf_stream_ops - the OPs to support a specific stream type
- */
-struct i915_perf_stream_ops {
- /**
- * @enable: Enables the collection of HW samples, either in response to
- * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
- * without `I915_PERF_FLAG_DISABLED`.
- */
- void (*enable)(struct i915_perf_stream *stream);
-
- /**
- * @disable: Disables the collection of HW samples, either in response
- * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
- * the stream.
- */
- void (*disable)(struct i915_perf_stream *stream);
-
- /**
- * @poll_wait: Call poll_wait, passing a wait queue that will be woken
- * once there is something ready to read() for the stream
- */
- void (*poll_wait)(struct i915_perf_stream *stream,
- struct file *file,
- poll_table *wait);
-
- /**
- * @wait_unlocked: For handling a blocking read, wait until there is
- * something to ready to read() for the stream. E.g. wait on the same
- * wait queue that would be passed to poll_wait().
- */
- int (*wait_unlocked)(struct i915_perf_stream *stream);
-
- /**
- * @read: Copy buffered metrics as records to userspace
- * **buf**: the userspace, destination buffer
- * **count**: the number of bytes to copy, requested by userspace
- * **offset**: zero at the start of the read, updated as the read
- * proceeds, it represents how many bytes have been copied so far and
- * the buffer offset for copying the next record.
- *
- * Copy as many buffered i915 perf samples and records for this stream
- * to userspace as will fit in the given buffer.
- *
- * Only write complete records; returning -%ENOSPC if there isn't room
- * for a complete record.
- *
- * Return any error condition that results in a short read such as
- * -%ENOSPC or -%EFAULT, even though these may be squashed before
- * returning to userspace.
- */
- int (*read)(struct i915_perf_stream *stream,
- char __user *buf,
- size_t count,
- size_t *offset);
-
- /**
- * @destroy: Cleanup any stream specific resources.
- *
- * The stream will always be disabled before this is called.
- */
- void (*destroy)(struct i915_perf_stream *stream);
-};
-
-/**
- * struct i915_perf_stream - state for a single open stream FD
- */
-struct i915_perf_stream {
- /**
- * @dev_priv: i915 drm device
- */
- struct drm_i915_private *dev_priv;
-
- /**
- * @link: Links the stream into ``&drm_i915_private->streams``
- */
- struct list_head link;
-
- /**
- * @wakeref: As we keep the device awake while the perf stream is
- * active, we track our runtime pm reference for later release.
- */
- intel_wakeref_t wakeref;
-
- /**
- * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
- * properties given when opening a stream, representing the contents
- * of a single sample as read() by userspace.
- */
- u32 sample_flags;
-
- /**
- * @sample_size: Considering the configured contents of a sample
- * combined with the required header size, this is the total size
- * of a single sample record.
- */
- int sample_size;
-
- /**
- * @ctx: %NULL if measuring system-wide across all contexts or a
- * specific context that is being monitored.
- */
- struct i915_gem_context *ctx;
-
- /**
- * @enabled: Whether the stream is currently enabled, considering
- * whether the stream was opened in a disabled state and based
- * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
- */
- bool enabled;
-
- /**
- * @ops: The callbacks providing the implementation of this specific
- * type of configured stream.
- */
- const struct i915_perf_stream_ops *ops;
-
- /**
- * @oa_config: The OA configuration used by the stream.
- */
- struct i915_oa_config *oa_config;
-
- /**
- * @pinned_ctx: The OA context specific information.
- */
- struct intel_context *pinned_ctx;
- u32 specific_ctx_id;
- u32 specific_ctx_id_mask;
-
- struct hrtimer poll_check_timer;
- wait_queue_head_t poll_wq;
- bool pollin;
-
- bool periodic;
- int period_exponent;
-
- /**
- * @oa_buffer: State of the OA buffer.
- */
- struct {
- struct i915_vma *vma;
- u8 *vaddr;
- u32 last_ctx_id;
- int format;
- int format_size;
- int size_exponent;
-
- /**
- * @ptr_lock: Locks reads and writes to all head/tail state
- *
- * Consider: the head and tail pointer state needs to be read
- * consistently from a hrtimer callback (atomic context) and
- * read() fop (user context) with tail pointer updates happening
- * in atomic context and head updates in user context and the
- * (unlikely) possibility of read() errors needing to reset all
- * head/tail state.
- *
- * Note: Contention/performance aren't currently a significant
- * concern here considering the relatively low frequency of
- * hrtimer callbacks (5ms period) and that reads typically only
- * happen in response to a hrtimer event and likely complete
- * before the next callback.
- *
- * Note: This lock is not held *while* reading and copying data
- * to userspace so the value of head observed in htrimer
- * callbacks won't represent any partial consumption of data.
- */
- spinlock_t ptr_lock;
-
- /**
- * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
- * used for reading.
- *
- * Initial values of 0xffffffff are invalid and imply that an
- * update is required (and should be ignored by an attempted
- * read)
- */
- struct {
- u32 offset;
- } tails[2];
-
- /**
- * @aged_tail_idx: Index for the aged tail ready to read() data up to.
- */
- unsigned int aged_tail_idx;
-
- /**
- * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
- * was read; used to determine when it is old enough to trust.
- */
- u64 aging_timestamp;
-
- /**
- * @head: Although we can always read back the head pointer register,
- * we prefer to avoid trusting the HW state, just to avoid any
- * risk that some hardware condition could * somehow bump the
- * head pointer unpredictably and cause us to forward the wrong
- * OA buffer data to userspace.
- */
- u32 head;
- } oa_buffer;
-};
-
-/**
- * struct i915_oa_ops - Gen specific implementation of an OA unit stream
- */
-struct i915_oa_ops {
- /**
- * @is_valid_b_counter_reg: Validates register's address for
- * programming boolean counters for a particular platform.
- */
- bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
- u32 addr);
-
- /**
- * @is_valid_mux_reg: Validates register's address for programming mux
- * for a particular platform.
- */
- bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
-
- /**
- * @is_valid_flex_reg: Validates register's address for programming
- * flex EU filtering for a particular platform.
- */
- bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
-
- /**
- * @enable_metric_set: Selects and applies any MUX configuration to set
- * up the Boolean and Custom (B/C) counters that are part of the
- * counter reports being sampled. May apply system constraints such as
- * disabling EU clock gating as required.
- */
- int (*enable_metric_set)(struct i915_perf_stream *stream);
-
- /**
- * @disable_metric_set: Remove system constraints associated with using
- * the OA unit.
- */
- void (*disable_metric_set)(struct i915_perf_stream *stream);
-
- /**
- * @oa_enable: Enable periodic sampling
- */
- void (*oa_enable)(struct i915_perf_stream *stream);
-
- /**
- * @oa_disable: Disable periodic sampling
- */
- void (*oa_disable)(struct i915_perf_stream *stream);
-
- /**
- * @read: Copy data from the circular OA buffer into a given userspace
- * buffer.
- */
- int (*read)(struct i915_perf_stream *stream,
- char __user *buf,
- size_t count,
- size_t *offset);
-
- /**
- * @oa_hw_tail_read: read the OA tail pointer register
- *
- * In particular this enables us to share all the fiddly code for
- * handling the OA unit tail pointer race that affects multiple
- * generations.
- */
- u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
-};
-
struct intel_cdclk_state {
unsigned int cdclk, vco, ref, bypass;
u8 voltage_level;
@@ -1560,6 +1259,8 @@ struct drm_i915_private {
I915_SAGV_NOT_CONTROLLED
} sagv_status;
+ u32 sagv_block_time_us;
+
struct {
/*
* Raw watermark latency values:
@@ -1630,61 +1331,7 @@ struct drm_i915_private {
struct intel_runtime_pm runtime_pm;
- struct {
- bool initialized;
-
- struct kobject *metrics_kobj;
- struct ctl_table_header *sysctl_header;
-
- /*
- * Lock associated with adding/modifying/removing OA configs
- * in dev_priv->perf.metrics_idr.
- */
- struct mutex metrics_lock;
-
- /*
- * List of dynamic configurations, you need to hold
- * dev_priv->perf.metrics_lock to access it.
- */
- struct idr metrics_idr;
-
- /*
- * Lock associated with anything below within this structure
- * except exclusive_stream.
- */
- struct mutex lock;
- struct list_head streams;
-
- /*
- * The stream currently using the OA unit. If accessed
- * outside a syscall associated to its file
- * descriptor, you need to hold
- * dev_priv->drm.struct_mutex.
- */
- struct i915_perf_stream *exclusive_stream;
-
- /**
- * For rate limiting any notifications of spurious
- * invalid OA reports
- */
- struct ratelimit_state spurious_report_rs;
-
- struct i915_oa_config test_config;
-
- u32 gen7_latched_oastatus1;
- u32 ctx_oactxctrl_offset;
- u32 ctx_flexeu0_offset;
-
- /**
- * The RPT_ID/reason field for Gen8+ includes a bit
- * to determine if the CTX ID in the report is valid
- * but the specific bit differs between Gen 8 and 9
- */
- u32 gen8_valid_ctx_bit;
-
- struct i915_oa_ops ops;
- const struct i915_oa_format *oa_formats;
- } perf;
+ struct i915_perf perf;
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct intel_gt gt;
@@ -1765,10 +1412,10 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
for_each_if ((engine__) = (dev_priv__)->engine[(id__)])
/* Iterator over subset of engines selected by mask */
-#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
- for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \
+#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
+ for ((tmp__) = (mask__) & INTEL_INFO((gt__)->i915)->engine_mask; \
(tmp__) ? \
- ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \
+ ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
0;)
#define rb_to_uabi_engine(rb) \
@@ -2031,6 +1678,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define IS_ICL_REVID(p, since, until) \
(IS_ICELAKE(p) && IS_REVID(p, since, until))
+#define TGL_REVID_A0 0x0
+
+#define IS_TGL_REVID(p, since, until) \
+ (IS_TIGERLAKE(p) && IS_REVID(p, since, until))
+
#define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp)
#define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv))
#define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv))
@@ -2128,6 +1780,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
+#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
+
#define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc)
/* Having GuC is not the same as using GuC */
@@ -2217,6 +1871,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
int i915_gem_freeze(struct drm_i915_private *dev_priv);
int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
+struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915);
+
static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
{
/*
@@ -2346,6 +2002,9 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
unsigned int flags);
int i915_gem_evict_vm(struct i915_address_space *vm);
+void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915);
+int i915_gem_init_memory_regions(struct drm_i915_private *i915);
+
/* i915_gem_internal.c */
struct drm_i915_gem_object *
i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
@@ -2354,9 +2013,9 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
/* i915_gem_tiling.c */
static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
- return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+ return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
i915_gem_object_is_tiled(obj);
}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0ddbd3a5fb8d..dd0a3271b4e2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -45,7 +45,6 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_ioctls.h"
#include "gem/i915_gem_pm.h"
-#include "gem/i915_gemfs.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
@@ -1111,15 +1110,6 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
if (err)
goto err_rq;
- /*
- * Failing to program the MOCS is non-fatal.The system will not
- * run at peak performance. So warn the user and carry on.
- */
- err = intel_mocs_emit(rq);
- if (err)
- dev_notice(i915->drm.dev,
- "Failed to program MOCS registers; expect performance issues.\n");
-
err = intel_renderstate_emit(rq);
if (err)
goto err_rq;
@@ -1369,7 +1359,7 @@ err_unlock:
/* Minimal basic recovery for KMS */
ret = i915_ggtt_enable_hw(dev_priv);
i915_gem_restore_gtt_mappings(dev_priv);
- i915_gem_restore_fences(dev_priv);
+ i915_gem_restore_fences(&dev_priv->ggtt);
intel_init_clock_gating(dev_priv);
}
@@ -1441,16 +1431,10 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
void i915_gem_init_early(struct drm_i915_private *dev_priv)
{
- int err;
-
i915_gem_init__mm(dev_priv);
i915_gem_init__pm(dev_priv);
spin_lock_init(&dev_priv->fb_tracking.lock);
-
- err = i915_gemfs_init(dev_priv);
- if (err)
- DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
}
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
@@ -1459,8 +1443,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
WARN_ON(dev_priv->mm.shrink_count);
-
- i915_gemfs_fini(dev_priv);
}
int i915_gem_freeze(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 167a7b56ed5b..2011f8e9a9f1 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -37,10 +37,8 @@ struct drm_i915_private;
#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER)
#define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \
- pr_err("%s:%d GEM_BUG_ON(%s)\n", \
- __func__, __LINE__, __stringify(condition)); \
- GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \
- __func__, __LINE__, __stringify(condition)); \
+ GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \
+ __func__, __LINE__, __stringify(condition)); \
BUG(); \
} \
} while(0)
@@ -66,17 +64,33 @@ struct drm_i915_private;
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
#define GEM_TRACE(...) trace_printk(__VA_ARGS__)
+#define GEM_TRACE_ERR(...) do { \
+ pr_err(__VA_ARGS__); \
+ trace_printk(__VA_ARGS__); \
+} while (0)
#define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL)
#define GEM_TRACE_DUMP_ON(expr) \
do { if (expr) ftrace_dump(DUMP_ALL); } while (0)
#else
#define GEM_TRACE(...) do { } while (0)
+#define GEM_TRACE_ERR(...) do { } while (0)
#define GEM_TRACE_DUMP() do { } while (0)
#define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr)
#endif
#define I915_GEM_IDLE_TIMEOUT (HZ / 5)
+static inline void tasklet_lock(struct tasklet_struct *t)
+{
+ while (!tasklet_trylock(t))
+ cpu_relax();
+}
+
+static inline bool tasklet_is_locked(const struct tasklet_struct *t)
+{
+ return test_bit(TASKLET_STATE_RUN, &t->state);
+}
+
static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
{
if (!atomic_fetch_inc(&t->count))
@@ -98,4 +112,18 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
return test_bit(TASKLET_STATE_SCHED, &t->state);
}
+static inline void cancel_timer(struct timer_list *t)
+{
+ if (!READ_ONCE(t->expires))
+ return;
+
+ del_timer(t);
+ WRITE_ONCE(t->expires, 0);
+}
+
+static inline bool timer_expired(const struct timer_list *t)
+{
+ return READ_ONCE(t->expires) && !timer_pending(t);
+}
+
#endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 487b7261f7ed..321189e1b0f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -59,6 +59,16 @@
#define pipelined 0
+static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
+{
+ return fence->ggtt->vm.i915;
+}
+
+static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
+{
+ return fence->ggtt->vm.gt->uncore;
+}
+
static void i965_write_fence_reg(struct i915_fence_reg *fence,
struct i915_vma *vma)
{
@@ -66,7 +76,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence,
int fence_pitch_shift;
u64 val;
- if (INTEL_GEN(fence->i915) >= 6) {
+ if (INTEL_GEN(fence_to_i915(fence)) >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
@@ -95,7 +105,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence,
}
if (!pipelined) {
- struct intel_uncore *uncore = &fence->i915->uncore;
+ struct intel_uncore *uncore = fence_to_uncore(fence);
/*
* To w/a incoherency with non-atomic 64-bit register updates,
@@ -132,7 +142,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence,
GEM_BUG_ON(!is_power_of_2(vma->fence_size));
GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
- if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
+ if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
stride /= 128;
else
stride /= 512;
@@ -148,7 +158,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence,
}
if (!pipelined) {
- struct intel_uncore *uncore = &fence->i915->uncore;
+ struct intel_uncore *uncore = fence_to_uncore(fence);
i915_reg_t reg = FENCE_REG(fence->id);
intel_uncore_write_fw(uncore, reg, val);
@@ -180,7 +190,7 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence,
}
if (!pipelined) {
- struct intel_uncore *uncore = &fence->i915->uncore;
+ struct intel_uncore *uncore = fence_to_uncore(fence);
i915_reg_t reg = FENCE_REG(fence->id);
intel_uncore_write_fw(uncore, reg, val);
@@ -191,15 +201,17 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence,
static void fence_write(struct i915_fence_reg *fence,
struct i915_vma *vma)
{
+ struct drm_i915_private *i915 = fence_to_i915(fence);
+
/*
* Previous access through the fence register is marshalled by
* the mb() inside the fault handlers (i915_gem_release_mmaps)
* and explicitly managed for internal users.
*/
- if (IS_GEN(fence->i915, 2))
+ if (IS_GEN(i915, 2))
i830_write_fence_reg(fence, vma);
- else if (IS_GEN(fence->i915, 3))
+ else if (IS_GEN(i915, 3))
i915_write_fence_reg(fence, vma);
else
i965_write_fence_reg(fence, vma);
@@ -215,6 +227,8 @@ static void fence_write(struct i915_fence_reg *fence,
static int fence_update(struct i915_fence_reg *fence,
struct i915_vma *vma)
{
+ struct i915_ggtt *ggtt = fence->ggtt;
+ struct intel_uncore *uncore = fence_to_uncore(fence);
intel_wakeref_t wakeref;
struct i915_vma *old;
int ret;
@@ -256,7 +270,7 @@ static int fence_update(struct i915_fence_reg *fence,
old->fence = NULL;
}
- list_move(&fence->link, &fence->i915->ggtt.fence_list);
+ list_move(&fence->link, &ggtt->fence_list);
}
/*
@@ -269,7 +283,7 @@ static int fence_update(struct i915_fence_reg *fence,
* be cleared before we can use any other fences to ensure that
* the new fences do not overlap the elided clears, confusing HW.
*/
- wakeref = intel_runtime_pm_get_if_in_use(&fence->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm);
if (!wakeref) {
GEM_BUG_ON(vma);
return 0;
@@ -280,10 +294,10 @@ static int fence_update(struct i915_fence_reg *fence,
if (vma) {
vma->fence = fence;
- list_move_tail(&fence->link, &fence->i915->ggtt.fence_list);
+ list_move_tail(&fence->link, &ggtt->fence_list);
}
- intel_runtime_pm_put(&fence->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(uncore->rpm, wakeref);
return 0;
}
@@ -312,11 +326,11 @@ int i915_vma_revoke_fence(struct i915_vma *vma)
return fence_update(fence, NULL);
}
-static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
+static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
{
struct i915_fence_reg *fence;
- list_for_each_entry(fence, &i915->ggtt.fence_list, link) {
+ list_for_each_entry(fence, &ggtt->fence_list, link) {
GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
if (atomic_read(&fence->pin_count))
@@ -326,7 +340,7 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
}
/* Wait for completion of pending flips which consume fences */
- if (intel_has_pending_fb_unpin(i915))
+ if (intel_has_pending_fb_unpin(ggtt->vm.i915))
return ERR_PTR(-EAGAIN);
return ERR_PTR(-EDEADLK);
@@ -351,7 +365,7 @@ int __i915_vma_pin_fence(struct i915_vma *vma)
return 0;
}
} else if (set) {
- fence = fence_find(vma->vm->i915);
+ fence = fence_find(ggtt);
if (IS_ERR(fence))
return PTR_ERR(fence);
@@ -402,7 +416,7 @@ int i915_vma_pin_fence(struct i915_vma *vma)
* Note that we revoke fences on runtime suspend. Therefore the user
* must keep the device awake whilst using the fence.
*/
- assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
+ assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
GEM_BUG_ON(!i915_vma_is_pinned(vma));
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
@@ -418,14 +432,13 @@ int i915_vma_pin_fence(struct i915_vma *vma)
/**
* i915_reserve_fence - Reserve a fence for vGPU
- * @i915: i915 device private
+ * @ggtt: Global GTT
*
* This function walks the fence regs looking for a free one and remove
* it from the fence_list. It is used to reserve fence for vGPU to use.
*/
-struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
struct i915_fence_reg *fence;
int count;
int ret;
@@ -439,7 +452,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
if (count <= 1)
return ERR_PTR(-ENOSPC);
- fence = fence_find(i915);
+ fence = fence_find(ggtt);
if (IS_ERR(fence))
return fence;
@@ -463,7 +476,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
*/
void i915_unreserve_fence(struct i915_fence_reg *fence)
{
- struct i915_ggtt *ggtt = &fence->i915->ggtt;
+ struct i915_ggtt *ggtt = fence->ggtt;
lockdep_assert_held(&ggtt->vm.mutex);
@@ -472,19 +485,19 @@ void i915_unreserve_fence(struct i915_fence_reg *fence)
/**
* i915_gem_restore_fences - restore fence state
- * @i915: i915 device private
+ * @ggtt: Global GTT
*
* Restore the hw fence state to match the software tracking again, to be called
* after a gpu reset and on resume. Note that on runtime suspend we only cancel
* the fences, to be reacquired by the user later.
*/
-void i915_gem_restore_fences(struct drm_i915_private *i915)
+void i915_gem_restore_fences(struct i915_ggtt *ggtt)
{
int i;
rcu_read_lock(); /* keep obj alive as we dereference */
- for (i = 0; i < i915->ggtt.num_fences; i++) {
- struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
+ for (i = 0; i < ggtt->num_fences; i++) {
+ struct i915_fence_reg *reg = &ggtt->fence_regs[i];
struct i915_vma *vma = READ_ONCE(reg->vma);
GEM_BUG_ON(vma && vma->fence != reg);
@@ -550,15 +563,16 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
*/
/**
- * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
- * @i915: i915 device private
+ * detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @ggtt: Global GGTT
*
* Detects bit 6 swizzling of address lookup between IGD access and CPU
* access through main memory.
*/
-static void detect_bit_6_swizzle(struct drm_i915_private *i915)
+static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
{
- struct intel_uncore *uncore = &i915->uncore;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
@@ -720,8 +734,8 @@ static void detect_bit_6_swizzle(struct drm_i915_private *i915)
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
}
- i915->mm.bit_6_swizzle_x = swizzle_x;
- i915->mm.bit_6_swizzle_y = swizzle_y;
+ i915->ggtt.bit_6_swizzle_x = swizzle_x;
+ i915->ggtt.bit_6_swizzle_y = swizzle_y;
}
/*
@@ -822,14 +836,15 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
int num_fences;
int i;
INIT_LIST_HEAD(&ggtt->fence_list);
INIT_LIST_HEAD(&ggtt->userfault_list);
- intel_wakeref_auto_init(&ggtt->userfault_wakeref, &i915->runtime_pm);
+ intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm);
- detect_bit_6_swizzle(i915);
+ detect_bit_6_swizzle(ggtt);
if (INTEL_GEN(i915) >= 7 &&
!(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
@@ -842,20 +857,20 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
num_fences = 8;
if (intel_vgpu_active(i915))
- num_fences = intel_uncore_read(&i915->uncore,
+ num_fences = intel_uncore_read(uncore,
vgtif_reg(avail_rs.fence_num));
/* Initialize fence registers to zero */
for (i = 0; i < num_fences; i++) {
struct i915_fence_reg *fence = &ggtt->fence_regs[i];
- fence->i915 = i915;
+ fence->ggtt = ggtt;
fence->id = i;
list_add_tail(&fence->link, &ggtt->fence_list);
}
ggtt->num_fences = num_fences;
- i915_gem_restore_fences(i915);
+ i915_gem_restore_fences(ggtt);
}
void intel_gt_init_swizzling(struct intel_gt *gt)
@@ -864,7 +879,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
if (INTEL_GEN(i915) < 5 ||
- i915->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+ i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
return;
intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99866fb9d94f..7bd521cd7cd7 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -29,7 +29,6 @@
#include <linux/types.h>
struct drm_i915_gem_object;
-struct drm_i915_private;
struct i915_ggtt;
struct i915_vma;
struct intel_gt;
@@ -39,7 +38,7 @@ struct sg_table;
struct i915_fence_reg {
struct list_head link;
- struct drm_i915_private *i915;
+ struct i915_ggtt *ggtt;
struct i915_vma *vma;
atomic_t pin_count;
int id;
@@ -55,10 +54,10 @@ struct i915_fence_reg {
};
/* i915_gem_fence_reg.c */
-struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915);
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt);
void i915_unreserve_fence(struct i915_fence_reg *fence);
-void i915_gem_restore_fences(struct drm_i915_private *i915);
+void i915_gem_restore_fences(struct i915_ggtt *ggtt);
void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
struct sg_table *pages);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7b15bb891970..3148d5946b63 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1569,7 +1569,7 @@ static void gen7_ppgtt_enable(struct intel_gt *gt)
}
intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
/* GFX_MODE is per-ring on gen7+ */
ENGINE_WRITE(engine,
RING_MODE_GEN7,
@@ -2744,6 +2744,59 @@ int i915_init_ggtt(struct drm_i915_private *i915)
return 0;
}
+void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915)
+{
+ int i;
+
+ for (i = 0; i < INTEL_REGION_UNKNOWN; i++) {
+ struct intel_memory_region *region = i915->mm.regions[i];
+
+ if (region)
+ intel_memory_region_put(region);
+ }
+}
+
+int i915_gem_init_memory_regions(struct drm_i915_private *i915)
+{
+ int err, i;
+
+ for (i = 0; i < INTEL_REGION_UNKNOWN; i++) {
+ struct intel_memory_region *mem = ERR_PTR(-ENODEV);
+ u32 type;
+
+ if (!HAS_REGION(i915, BIT(i)))
+ continue;
+
+ type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]);
+ switch (type) {
+ case INTEL_MEMORY_SYSTEM:
+ mem = i915_gem_shmem_setup(i915);
+ break;
+ case INTEL_MEMORY_STOLEN:
+ mem = i915_gem_stolen_setup(i915);
+ break;
+ }
+
+ if (IS_ERR(mem)) {
+ err = PTR_ERR(mem);
+ DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type);
+ goto out_cleanup;
+ }
+
+ mem->id = intel_region_map[i];
+ mem->type = type;
+ mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]);
+
+ i915->mm.regions[i] = mem;
+ }
+
+ return 0;
+
+out_cleanup:
+ i915_gem_cleanup_memory_regions(i915);
+ return err;
+}
+
static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
{
struct i915_vma *vma, *vn;
@@ -2781,6 +2834,8 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
struct pagevec *pvec;
+ i915_gem_cleanup_memory_regions(i915);
+
fini_aliasing_ppgtt(&i915->ggtt);
ggtt_cleanup_hw(&i915->ggtt);
@@ -2790,8 +2845,6 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
set_pages_array_wb(pvec->pages, pvec->nr);
__pagevec_release(pvec);
}
-
- i915_gem_cleanup_stolen(i915);
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -3240,11 +3293,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
if (ret)
return ret;
- /*
- * Initialise stolen early so that we may reserve preallocated
- * objects for the BIOS to KMS transition.
- */
- ret = i915_gem_init_stolen(dev_priv);
+ ret = i915_gem_init_memory_regions(dev_priv);
if (ret)
goto out_gtt_cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 0a18fdfe63ff..f074f1de66e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -411,6 +411,11 @@ struct i915_ggtt {
int mtrr;
+ /** Bit 6 swizzling required for X tiling */
+ u32 bit_6_swizzle_x;
+ /** Bit 6 swizzling required for Y tiling */
+ u32 bit_6_swizzle_y;
+
u32 pin_bias;
unsigned int num_fences;
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index f4b3cbb1adce..ad33fbe90a28 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -5,6 +5,7 @@
#include "gt/intel_engine_user.h"
#include "i915_drv.h"
+#include "i915_perf.h"
int i915_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
@@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_MMAP_GTT_COHERENT:
value = INTEL_INFO(i915)->has_coherent_ggtt;
break;
+ case I915_PARAM_PERF_REVISION:
+ value = i915_perf_ioctl_version();
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f2371b6083c6..572a5c37cc61 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -412,7 +412,7 @@ void gen9_reset_guc_interrupts(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+ assert_rpm_wakelock_held(gt->uncore->rpm);
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_reset_iir(gt, gt->pm_guc_events);
@@ -423,7 +423,7 @@ void gen9_enable_guc_interrupts(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+ assert_rpm_wakelock_held(gt->uncore->rpm);
spin_lock_irq(&gt->irq_lock);
if (!guc->interrupts.enabled) {
@@ -440,7 +440,7 @@ void gen9_disable_guc_interrupts(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+ assert_rpm_wakelock_held(gt->uncore->rpm);
spin_lock_irq(&gt->irq_lock);
guc->interrupts.enabled = false;
@@ -2248,11 +2248,19 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP;
tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect;
pins = hpd_tgp;
- } else if (HAS_PCH_MCC(dev_priv)) {
+ } else if (HAS_PCH_JSP(dev_priv)) {
ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
tc_hotplug_trigger = 0;
+ pins = hpd_tgp;
+ } else if (HAS_PCH_MCC(dev_priv)) {
+ ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
+ tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1);
+ tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect;
pins = hpd_icp;
} else {
+ WARN(!HAS_PCH_ICP(dev_priv),
+ "Unrecognized PCH type 0x%x\n", INTEL_PCH_TYPE(dev_priv));
+
ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect;
@@ -3377,9 +3385,22 @@ static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv,
static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv)
{
icp_hpd_irq_setup(dev_priv,
+ SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1),
+ ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1),
+ hpd_icp);
+}
+
+/*
+ * JSP behaves exactly the same as MCC above except that port C is mapped to
+ * the DDI-C pins instead of the TC1 pins. This means we should follow TGP's
+ * masks & tables rather than ICP's masks & tables.
+ */
+static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv)
+{
+ icp_hpd_irq_setup(dev_priv,
SDE_DDI_MASK_TGP, 0,
TGP_DDI_HPD_ENABLE_MASK, 0,
- hpd_icp);
+ hpd_tgp);
}
static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv)
@@ -3782,8 +3803,11 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv)
if (HAS_PCH_TGP(dev_priv))
icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK,
TGP_TC_HPD_ENABLE_MASK);
- else if (HAS_PCH_MCC(dev_priv))
+ else if (HAS_PCH_JSP(dev_priv))
icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0);
+ else if (HAS_PCH_MCC(dev_priv))
+ icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
+ ICP_TC_HPD_ENABLE(PORT_TC1));
else
icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
ICP_TC_HPD_ENABLE_MASK);
@@ -4313,7 +4337,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
if (I915_HAS_HOTPLUG(dev_priv))
dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
} else {
- if (HAS_PCH_MCC(dev_priv))
+ if (HAS_PCH_JSP(dev_priv))
+ dev_priv->display.hpd_irq_setup = jsp_hpd_irq_setup;
+ else if (HAS_PCH_MCC(dev_priv))
dev_priv->display.hpd_irq_setup = mcc_hpd_irq_setup;
else if (INTEL_GEN(dev_priv) >= 11)
dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup;
@@ -4460,10 +4486,10 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv)
int irq = dev_priv->drm.pdev->irq;
/*
- * FIXME we can get called twice during driver load
- * error handling due to intel_modeset_cleanup()
- * calling us out of sequence. Would be nice if
- * it didn't do that...
+ * FIXME we can get called twice during driver probe
+ * error handling as well as during driver remove due to
+ * intel_modeset_driver_remove() calling us out of sequence.
+ * Would be nice if it didn't do that...
*/
if (!dev_priv->drm.irq_enabled)
return;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 296452f9efe4..4f1806f65040 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -46,7 +46,8 @@ i915_param_named(modeset, int, 0400,
i915_param_named_unsafe(enable_dc, int, 0400,
"Enable power-saving display C-states. "
- "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)");
+ "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
+ "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
i915_param_named_unsafe(enable_fbc, int, 0600,
"Enable frame buffer compression for power savings "
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 1cbf3998b361..f9a3bfe68689 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -151,6 +151,9 @@
#define GEN_DEFAULT_PAGE_SIZES \
.page_sizes = I915_GTT_PAGE_SIZE_4K
+#define GEN_DEFAULT_REGIONS \
+ .memory_regions = REGION_SMEM | REGION_STOLEN
+
#define I830_FEATURES \
GEN(2), \
.is_mobile = 1, \
@@ -168,7 +171,8 @@
I9XX_PIPE_OFFSETS, \
I9XX_CURSOR_OFFSETS, \
I9XX_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
#define I845_FEATURES \
GEN(2), \
@@ -185,7 +189,8 @@
I845_PIPE_OFFSETS, \
I845_CURSOR_OFFSETS, \
I9XX_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
static const struct intel_device_info intel_i830_info = {
I830_FEATURES,
@@ -219,7 +224,8 @@ static const struct intel_device_info intel_i865g_info = {
I9XX_PIPE_OFFSETS, \
I9XX_CURSOR_OFFSETS, \
I9XX_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
static const struct intel_device_info intel_i915g_info = {
GEN3_FEATURES,
@@ -304,7 +310,8 @@ static const struct intel_device_info intel_pineview_m_info = {
I9XX_PIPE_OFFSETS, \
I9XX_CURSOR_OFFSETS, \
I965_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
static const struct intel_device_info intel_i965g_info = {
GEN4_FEATURES,
@@ -354,7 +361,8 @@ static const struct intel_device_info intel_gm45_info = {
I9XX_PIPE_OFFSETS, \
I9XX_CURSOR_OFFSETS, \
ILK_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
static const struct intel_device_info intel_ironlake_d_info = {
GEN5_FEATURES,
@@ -384,7 +392,8 @@ static const struct intel_device_info intel_ironlake_m_info = {
I9XX_PIPE_OFFSETS, \
I9XX_CURSOR_OFFSETS, \
ILK_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
#define SNB_D_PLATFORM \
GEN6_FEATURES, \
@@ -432,7 +441,8 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
IVB_PIPE_OFFSETS, \
IVB_CURSOR_OFFSETS, \
IVB_COLORS, \
- GEN_DEFAULT_PAGE_SIZES
+ GEN_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
#define IVB_D_PLATFORM \
GEN7_FEATURES, \
@@ -493,6 +503,7 @@ static const struct intel_device_info intel_valleyview_info = {
I9XX_CURSOR_OFFSETS,
I965_COLORS,
GEN_DEFAULT_PAGE_SIZES,
+ GEN_DEFAULT_REGIONS,
};
#define G75_FEATURES \
@@ -587,6 +598,7 @@ static const struct intel_device_info intel_cherryview_info = {
CHV_CURSOR_OFFSETS,
CHV_COLORS,
GEN_DEFAULT_PAGE_SIZES,
+ GEN_DEFAULT_REGIONS,
};
#define GEN9_DEFAULT_PAGE_SIZES \
@@ -661,7 +673,8 @@ static const struct intel_device_info intel_skylake_gt4_info = {
HSW_PIPE_OFFSETS, \
IVB_CURSOR_OFFSETS, \
IVB_COLORS, \
- GEN9_DEFAULT_PAGE_SIZES
+ GEN9_DEFAULT_PAGE_SIZES, \
+ GEN_DEFAULT_REGIONS
static const struct intel_device_info intel_broxton_info = {
GEN9_LP_FEATURES,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 231388d06c82..d2ac51fe4f04 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,7 +196,9 @@
#include <linux/uuid.h>
#include "gem/i915_gem_context.h"
-#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
#include "gt/intel_lrc_reg.h"
#include "i915_drv.h"
@@ -342,11 +344,14 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
* struct perf_open_properties - for validated properties given to open a stream
* @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
* @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ * context
* @ctx_handle: A gem ctx handle for use with @single_context
* @metrics_set: An ID for an OA unit metric set advertised via sysfs
* @oa_format: An OA unit HW report format
* @oa_periodic: Whether to enable periodic OA unit sampling
* @oa_period_exponent: The OA unit sampling period is derived from this
+ * @engine: The engine (typically rcs0) being monitored by the OA unit
*
* As read_properties_unlocked() enumerates and validates the properties given
* to open a stream of metrics the configuration is built up in the structure
@@ -356,6 +361,7 @@ struct perf_open_properties {
u32 sample_flags;
u64 single_context:1;
+ u64 hold_preemption:1;
u64 ctx_handle;
/* OA sampling state */
@@ -363,69 +369,66 @@ struct perf_open_properties {
int oa_format;
bool oa_periodic;
int oa_period_exponent;
+
+ struct intel_engine_cs *engine;
+};
+
+struct i915_oa_config_bo {
+ struct llist_node node;
+
+ struct i915_oa_config *oa_config;
+ struct i915_vma *vma;
};
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
-static void free_oa_config(struct drm_i915_private *dev_priv,
- struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
{
- if (!PTR_ERR(oa_config->flex_regs))
- kfree(oa_config->flex_regs);
- if (!PTR_ERR(oa_config->b_counter_regs))
- kfree(oa_config->b_counter_regs);
- if (!PTR_ERR(oa_config->mux_regs))
- kfree(oa_config->mux_regs);
- kfree(oa_config);
-}
+ struct i915_oa_config *oa_config =
+ container_of(ref, typeof(*oa_config), ref);
-static void put_oa_config(struct drm_i915_private *dev_priv,
- struct i915_oa_config *oa_config)
-{
- if (!atomic_dec_and_test(&oa_config->ref_count))
- return;
+ kfree(oa_config->flex_regs);
+ kfree(oa_config->b_counter_regs);
+ kfree(oa_config->mux_regs);
- free_oa_config(dev_priv, oa_config);
+ kfree_rcu(oa_config, rcu);
}
-static int get_oa_config(struct drm_i915_private *dev_priv,
- int metrics_set,
- struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
{
- int ret;
-
- if (metrics_set == 1) {
- *out_config = &dev_priv->perf.test_config;
- atomic_inc(&dev_priv->perf.test_config.ref_count);
- return 0;
- }
-
- ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
- if (ret)
- return ret;
+ struct i915_oa_config *oa_config;
- *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
- if (!*out_config)
- ret = -EINVAL;
+ rcu_read_lock();
+ if (metrics_set == 1)
+ oa_config = &perf->test_config;
else
- atomic_inc(&(*out_config)->ref_count);
+ oa_config = idr_find(&perf->metrics_idr, metrics_set);
+ if (oa_config)
+ oa_config = i915_oa_config_get(oa_config);
+ rcu_read_unlock();
- mutex_unlock(&dev_priv->perf.metrics_lock);
+ return oa_config;
+}
- return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+ i915_oa_config_put(oa_bo->oa_config);
+ i915_vma_put(oa_bo->vma);
+ kfree(oa_bo);
}
static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
- return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+ return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
}
static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
+ struct intel_uncore *uncore = stream->uncore;
+ u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
}
@@ -456,7 +459,6 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
*/
static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = stream->oa_buffer.format_size;
unsigned long flags;
unsigned int aged_idx;
@@ -479,7 +481,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
aged_tail = stream->oa_buffer.tails[aged_idx].offset;
aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
- hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
+ hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
/* The tail pointer increases in 64 byte increments,
* not in report_size steps...
@@ -655,7 +657,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
int report_size = stream->oa_buffer.format_size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -740,7 +742,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
OAREPORT_REASON_MASK);
if (reason == 0) {
- if (__ratelimit(&dev_priv->perf.spurious_report_rs))
+ if (__ratelimit(&stream->perf->spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
continue;
}
@@ -755,7 +757,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit))
+ if (!(report32[0] & stream->perf->gen8_valid_ctx_bit))
ctx_id = report32[2] = INVALID_CTX_ID;
/*
@@ -789,7 +791,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* switches since it's not-uncommon for periodic samples to
* identify a switch before any 'context switch' report.
*/
- if (!dev_priv->perf.exclusive_stream->ctx ||
+ if (!stream->perf->exclusive_stream->ctx ||
stream->specific_ctx_id == ctx_id ||
stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
reason & OAREPORT_REASON_CTX_SWITCH) {
@@ -798,7 +800,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* While filtering for a single context we avoid
* leaking the IDs of other contexts.
*/
- if (dev_priv->perf.exclusive_stream->ctx &&
+ if (stream->perf->exclusive_stream->ctx &&
stream->specific_ctx_id != ctx_id) {
report32[2] = INVALID_CTX_ID;
}
@@ -830,7 +832,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
*/
head += gtt_offset;
- I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
+ intel_uncore_write(uncore, GEN8_OAHEADPTR,
+ head & GEN8_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -864,14 +867,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
u32 oastatus;
int ret;
if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
- oastatus = I915_READ(GEN8_OASTATUS);
+ oastatus = intel_uncore_read(uncore, GEN8_OASTATUS);
/*
* We treat OABUFFER_OVERFLOW as a significant error:
@@ -896,14 +899,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
stream->period_exponent);
- dev_priv->perf.ops.oa_disable(stream);
- dev_priv->perf.ops.oa_enable(stream);
+ stream->perf->ops.oa_disable(stream);
+ stream->perf->ops.oa_enable(stream);
/*
* Note: .oa_enable() is expected to re-init the oabuffer and
* reset GEN8_OASTATUS for us
*/
- oastatus = I915_READ(GEN8_OASTATUS);
+ oastatus = intel_uncore_read(uncore, GEN8_OASTATUS);
}
if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
@@ -911,8 +914,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
DRM_I915_PERF_RECORD_OA_REPORT_LOST);
if (ret)
return ret;
- I915_WRITE(GEN8_OASTATUS,
- oastatus & ~GEN8_OASTATUS_REPORT_LOST);
+ intel_uncore_write(uncore, GEN8_OASTATUS,
+ oastatus & ~GEN8_OASTATUS_REPORT_LOST);
}
return gen8_append_oa_reports(stream, buf, count, offset);
@@ -943,7 +946,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
int report_size = stream->oa_buffer.format_size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
@@ -1017,7 +1020,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
* copying it to userspace...
*/
if (report32[0] == 0) {
- if (__ratelimit(&dev_priv->perf.spurious_report_rs))
+ if (__ratelimit(&stream->perf->spurious_report_rs))
DRM_NOTE("Skipping spurious, invalid OA report\n");
continue;
}
@@ -1043,9 +1046,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
*/
head += gtt_offset;
- I915_WRITE(GEN7_OASTATUS2,
- ((head & GEN7_OASTATUS2_HEAD_MASK) |
- GEN7_OASTATUS2_MEM_SELECT_GGTT));
+ intel_uncore_write(uncore, GEN7_OASTATUS2,
+ (head & GEN7_OASTATUS2_HEAD_MASK) |
+ GEN7_OASTATUS2_MEM_SELECT_GGTT);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -1075,21 +1078,21 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
u32 oastatus1;
int ret;
if (WARN_ON(!stream->oa_buffer.vaddr))
return -EIO;
- oastatus1 = I915_READ(GEN7_OASTATUS1);
+ oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
/* XXX: On Haswell we don't have a safe way to clear oastatus1
* bits while the OA unit is enabled (while the tail pointer
* may be updated asynchronously) so we ignore status bits
* that have already been reported to userspace.
*/
- oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1;
+ oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
/* We treat OABUFFER_OVERFLOW as a significant error:
*
@@ -1120,10 +1123,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
stream->period_exponent);
- dev_priv->perf.ops.oa_disable(stream);
- dev_priv->perf.ops.oa_enable(stream);
+ stream->perf->ops.oa_disable(stream);
+ stream->perf->ops.oa_enable(stream);
- oastatus1 = I915_READ(GEN7_OASTATUS1);
+ oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
}
if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
@@ -1131,7 +1134,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
DRM_I915_PERF_RECORD_OA_REPORT_LOST);
if (ret)
return ret;
- dev_priv->perf.gen7_latched_oastatus1 |=
+ stream->perf->gen7_latched_oastatus1 |=
GEN7_OASTATUS1_REPORT_LOST;
}
@@ -1196,9 +1199,7 @@ static int i915_oa_read(struct i915_perf_stream *stream,
size_t count,
size_t *offset)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
- return dev_priv->perf.ops.read(stream, buf, count, offset);
+ return stream->perf->ops.read(stream, buf, count, offset);
}
static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
@@ -1209,7 +1210,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
int err;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
- if (ce->engine->class != RENDER_CLASS)
+ if (ce->engine != stream->engine) /* first match! */
continue;
/*
@@ -1239,14 +1240,13 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
*/
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
{
- struct drm_i915_private *i915 = stream->dev_priv;
struct intel_context *ce;
ce = oa_pin_context(stream);
if (IS_ERR(ce))
return PTR_ERR(ce);
- switch (INTEL_GEN(i915)) {
+ switch (INTEL_GEN(ce->engine->i915)) {
case 7: {
/*
* On Haswell we don't do any post processing of the reports
@@ -1260,7 +1260,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
case 8:
case 9:
case 10:
- if (USES_GUC_SUBMISSION(i915)) {
+ if (USES_GUC_SUBMISSION(ce->engine->i915)) {
/*
* When using GuC, the context descriptor we write in
* i915 is read by GuC and rewritten before it's
@@ -1296,7 +1296,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
}
default:
- MISSING_CASE(INTEL_GEN(i915));
+ MISSING_CASE(INTEL_GEN(ce->engine->i915));
}
ce->tag = stream->specific_ctx_id_mask;
@@ -1338,40 +1338,55 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
}
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+ struct i915_oa_config_bo *oa_bo, *tmp;
+
+ i915_oa_config_put(stream->oa_config);
+ llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
+ free_oa_config_bo(oa_bo);
+}
+
+static void
+free_noa_wait(struct i915_perf_stream *stream)
+{
+ i915_vma_unpin_and_release(&stream->noa_wait, 0);
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct i915_perf *perf = stream->perf;
- BUG_ON(stream != dev_priv->perf.exclusive_stream);
+ BUG_ON(stream != perf->exclusive_stream);
/*
* Unset exclusive_stream first, it will be checked while disabling
* the metric set on gen8+.
*/
- mutex_lock(&dev_priv->drm.struct_mutex);
- dev_priv->perf.exclusive_stream = NULL;
- dev_priv->perf.ops.disable_metric_set(stream);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ perf->exclusive_stream = NULL;
+ perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
- intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
- intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
+ intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+ intel_engine_pm_put(stream->engine);
if (stream->ctx)
oa_put_render_ctx_id(stream);
- put_oa_config(dev_priv, stream->oa_config);
+ free_oa_configs(stream);
+ free_noa_wait(stream);
- if (dev_priv->perf.spurious_report_rs.missed) {
+ if (perf->spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
- dev_priv->perf.spurious_report_rs.missed);
+ perf->spurious_report_rs.missed);
}
}
static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
@@ -1380,13 +1395,14 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
/* Pre-DevBDW: OABUFFER must be set with counters off,
* before OASTATUS1, but after OASTATUS2
*/
- I915_WRITE(GEN7_OASTATUS2,
- gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */
+ intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */
+ gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
stream->oa_buffer.head = gtt_offset;
- I915_WRITE(GEN7_OABUFFER, gtt_offset);
+ intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
- I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
+ intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */
+ gtt_offset | OABUFFER_SIZE_16M);
/* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
@@ -1398,7 +1414,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
* already seen since they can't be cleared while periodic
* sampling is enabled.
*/
- dev_priv->perf.gen7_latched_oastatus1 = 0;
+ stream->perf->gen7_latched_oastatus1 = 0;
/* NB: although the OA buffer will initially be allocated
* zeroed via shmfs (and so this memset is redundant when
@@ -1413,25 +1429,22 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
*/
memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
- /* Maybe make ->pollin per-stream state if we support multiple
- * concurrent streams in the future.
- */
stream->pollin = false;
}
static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
unsigned long flags;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- I915_WRITE(GEN8_OASTATUS, 0);
- I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
+ intel_uncore_write(uncore, GEN8_OASTATUS, 0);
+ intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
stream->oa_buffer.head = gtt_offset;
- I915_WRITE(GEN8_OABUFFER_UDW, 0);
+ intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
/*
* PRM says:
@@ -1441,9 +1454,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
* to enable proper functionality of the overflow
* bit."
*/
- I915_WRITE(GEN8_OABUFFER, gtt_offset |
+ intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
- I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
+ intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
@@ -1472,17 +1485,12 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
*/
memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
- /*
- * Maybe make ->pollin per-stream state if we support multiple
- * concurrent streams in the future.
- */
stream->pollin = false;
}
static int alloc_oa_buffer(struct i915_perf_stream *stream)
{
struct drm_i915_gem_object *bo;
- struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_vma *vma;
int ret;
@@ -1492,7 +1500,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
- bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
+ bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
if (IS_ERR(bo)) {
DRM_ERROR("Failed to allocate OA buffer\n");
return PTR_ERR(bo);
@@ -1515,10 +1523,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
goto err_unpin;
}
- DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
- i915_ggtt_offset(stream->oa_buffer.vma),
- stream->oa_buffer.vaddr);
-
return 0;
err_unpin:
@@ -1533,50 +1537,381 @@ err_unref:
return ret;
}
-static void config_oa_regs(struct drm_i915_private *dev_priv,
- const struct i915_oa_reg *regs,
- u32 n_regs)
+static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
+ bool save, i915_reg_t reg, u32 offset,
+ u32 dword_count)
+{
+ u32 cmd;
+ u32 d;
+
+ cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+ if (INTEL_GEN(stream->perf->i915) >= 8)
+ cmd++;
+
+ for (d = 0; d < dword_count; d++) {
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
+ *cs++ = intel_gt_scratch_offset(stream->engine->gt,
+ offset) + 4 * d;
+ *cs++ = 0;
+ }
+
+ return cs;
+}
+
+static int alloc_noa_wait(struct i915_perf_stream *stream)
+{
+ struct drm_i915_private *i915 = stream->perf->i915;
+ struct drm_i915_gem_object *bo;
+ struct i915_vma *vma;
+ const u64 delay_ticks = 0xffffffffffffffff -
+ DIV64_U64_ROUND_UP(
+ atomic64_read(&stream->perf->noa_programming_delay) *
+ RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
+ 1000000ull);
+ const u32 base = stream->engine->mmio_base;
+#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
+ u32 *batch, *ts0, *cs, *jump;
+ int ret, i;
+ enum {
+ START_TS,
+ NOW_TS,
+ DELTA_TS,
+ JUMP_PREDICATE,
+ DELTA_TARGET,
+ N_CS_GPR
+ };
+
+ bo = i915_gem_object_create_internal(i915, 4096);
+ if (IS_ERR(bo)) {
+ DRM_ERROR("Failed to allocate NOA wait batchbuffer\n");
+ return PTR_ERR(bo);
+ }
+
+ /*
+ * We pin in GGTT because we jump into this buffer now because
+ * multiple OA config BOs will have a jump to this address and it
+ * needs to be fixed during the lifetime of the i915/perf stream.
+ */
+ vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unref;
+ }
+
+ batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+ if (IS_ERR(batch)) {
+ ret = PTR_ERR(batch);
+ goto err_unpin;
+ }
+
+ /* Save registers. */
+ for (i = 0; i < N_CS_GPR; i++)
+ cs = save_restore_register(
+ stream, cs, true /* save */, CS_GPR(i),
+ INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+ cs = save_restore_register(
+ stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
+ INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+ /* First timestamp snapshot location. */
+ ts0 = cs;
+
+ /*
+ * Initial snapshot of the timestamp register to implement the wait.
+ * We work with 32b values, so clear out the top 32b bits of the
+ * register because the ALU works 64bits.
+ */
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+ *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
+
+ /*
+ * This is the location we're going to jump back into until the
+ * required amount of time has passed.
+ */
+ jump = cs;
+
+ /*
+ * Take another snapshot of the timestamp register. Take care to clear
+ * up the top 32bits of CS_GPR(1) as we're using it for other
+ * operations below.
+ */
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+ *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
+
+ /*
+ * Do a diff between the 2 timestamps and store the result back into
+ * CS_GPR(1).
+ */
+ *cs++ = MI_MATH(5);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+ /*
+ * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+ * timestamp have rolled over the 32bits) into the predicate register
+ * to be used for the predicated jump.
+ */
+ *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+ *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+ /* Restart from the beginning if we had timestamps roll over. */
+ *cs++ = (INTEL_GEN(i915) < 8 ?
+ MI_BATCH_BUFFER_START :
+ MI_BATCH_BUFFER_START_GEN8) |
+ MI_BATCH_PREDICATE;
+ *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
+ *cs++ = 0;
+
+ /*
+ * Now add the diff between to previous timestamps and add it to :
+ * (((1 * << 64) - 1) - delay_ns)
+ *
+ * When the Carry Flag contains 1 this means the elapsed time is
+ * longer than the expected delay, and we can exit the wait loop.
+ */
+ *cs++ = MI_LOAD_REGISTER_IMM(2);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
+ *cs++ = lower_32_bits(delay_ticks);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
+ *cs++ = upper_32_bits(delay_ticks);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+ /*
+ * Transfer the result into the predicate register to be used for the
+ * predicated jump.
+ */
+ *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+ *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+ /* Predicate the jump. */
+ *cs++ = (INTEL_GEN(i915) < 8 ?
+ MI_BATCH_BUFFER_START :
+ MI_BATCH_BUFFER_START_GEN8) |
+ MI_BATCH_PREDICATE;
+ *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
+ *cs++ = 0;
+
+ /* Restore registers. */
+ for (i = 0; i < N_CS_GPR; i++)
+ cs = save_restore_register(
+ stream, cs, false /* restore */, CS_GPR(i),
+ INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+ cs = save_restore_register(
+ stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
+ INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+ /* And return to the ring. */
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
+
+ i915_gem_object_flush_map(bo);
+ i915_gem_object_unpin_map(bo);
+
+ stream->noa_wait = vma;
+ return 0;
+
+err_unpin:
+ i915_vma_unpin_and_release(&vma, 0);
+err_unref:
+ i915_gem_object_put(bo);
+ return ret;
+}
+
+static u32 *write_cs_mi_lri(u32 *cs,
+ const struct i915_oa_reg *reg_data,
+ u32 n_regs)
{
u32 i;
for (i = 0; i < n_regs; i++) {
- const struct i915_oa_reg *reg = regs + i;
+ if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+ u32 n_lri = min_t(u32,
+ n_regs - i,
+ MI_LOAD_REGISTER_IMM_MAX_REGS);
- I915_WRITE(reg->addr, reg->value);
+ *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+ }
+ *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+ *cs++ = reg_data[i].value;
}
+
+ return cs;
}
-static void delay_after_mux(void)
+static int num_lri_dwords(int num_regs)
{
+ int count = 0;
+
+ if (num_regs > 0) {
+ count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+ count += num_regs * 2;
+ }
+
+ return count;
+}
+
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+ struct i915_oa_config *oa_config)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_oa_config_bo *oa_bo;
+ size_t config_length = 0;
+ u32 *cs;
+ int err;
+
+ oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+ if (!oa_bo)
+ return ERR_PTR(-ENOMEM);
+
+ config_length += num_lri_dwords(oa_config->mux_regs_len);
+ config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+ config_length += num_lri_dwords(oa_config->flex_regs_len);
+ config_length++; /* MI_BATCH_BUFFER_END */
+ config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+ obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_free;
+ }
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_oa_bo;
+ }
+
+ cs = write_cs_mi_lri(cs,
+ oa_config->mux_regs,
+ oa_config->mux_regs_len);
+ cs = write_cs_mi_lri(cs,
+ oa_config->b_counter_regs,
+ oa_config->b_counter_regs_len);
+ cs = write_cs_mi_lri(cs,
+ oa_config->flex_regs,
+ oa_config->flex_regs_len);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ oa_bo->vma = i915_vma_instance(obj,
+ &stream->engine->gt->ggtt->vm,
+ NULL);
+ if (IS_ERR(oa_bo->vma)) {
+ err = PTR_ERR(oa_bo->vma);
+ goto err_oa_bo;
+ }
+
+ oa_bo->oa_config = i915_oa_config_get(oa_config);
+ llist_add(&oa_bo->node, &stream->oa_config_bos);
+
+ return oa_bo;
+
+err_oa_bo:
+ i915_gem_object_put(obj);
+err_free:
+ kfree(oa_bo);
+ return ERR_PTR(err);
+}
+
+static struct i915_vma *
+get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+{
+ struct i915_oa_config_bo *oa_bo;
+
/*
- * It apparently takes a fairly long time for a new MUX
- * configuration to be be applied after these register writes.
- * This delay duration was derived empirically based on the
- * render_basic config but hopefully it covers the maximum
- * configuration latency.
- *
- * As a fallback, the checks in _append_oa_reports() to skip
- * invalid OA reports do also seem to work to discard reports
- * generated before this config has completed - albeit not
- * silently.
- *
- * Unfortunately this is essentially a magic number, since we
- * don't currently know of a reliable mechanism for predicting
- * how long the MUX config will take to apply and besides
- * seeing invalid reports we don't know of a reliable way to
- * explicitly check that the MUX config has landed.
- *
- * It's even possible we've miss characterized the underlying
- * problem - it just seems like the simplest explanation why
- * a delay at this location would mitigate any invalid reports.
+ * Look for the buffer in the already allocated BOs attached
+ * to the stream.
*/
- usleep_range(15000, 20000);
+ llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
+ if (oa_bo->oa_config == oa_config &&
+ memcmp(oa_bo->oa_config->uuid,
+ oa_config->uuid,
+ sizeof(oa_config->uuid)) == 0)
+ goto out;
+ }
+
+ oa_bo = alloc_oa_config_buffer(stream, oa_config);
+ if (IS_ERR(oa_bo))
+ return ERR_CAST(oa_bo);
+
+out:
+ return i915_vma_get(oa_bo->vma);
+}
+
+static int emit_oa_config(struct i915_perf_stream *stream,
+ struct i915_oa_config *oa_config,
+ struct intel_context *ce)
+{
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ int err;
+
+ vma = get_oa_vma(stream, oa_config);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ goto err_vma_put;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma_unpin;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, 0);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ i915_vma_unlock(vma);
+ if (err)
+ goto err_add_request;
+
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start, 0,
+ I915_DISPATCH_SECURE);
+err_add_request:
+ i915_request_add(rq);
+err_vma_unpin:
+ i915_vma_unpin(vma);
+err_vma_put:
+ i915_vma_put(vma);
+ return err;
+}
+
+static struct intel_context *oa_context(struct i915_perf_stream *stream)
+{
+ return stream->pinned_ctx ?: stream->engine->kernel_context;
}
static int hsw_enable_metric_set(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- const struct i915_oa_config *oa_config = stream->oa_config;
+ struct intel_uncore *uncore = stream->uncore;
/*
* PRM:
@@ -1588,31 +1923,24 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
* count the events from non-render domain. Unit level clock
* gating for RCS should also be disabled.
*/
- I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
- ~GEN7_DOP_CLOCK_GATE_ENABLE));
- I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
- GEN6_CSUNIT_CLOCK_GATE_DISABLE));
+ intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
+ GEN7_DOP_CLOCK_GATE_ENABLE, 0);
+ intel_uncore_rmw(uncore, GEN6_UCGCTL1,
+ 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
- config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
- delay_after_mux();
-
- config_oa_regs(dev_priv, oa_config->b_counter_regs,
- oa_config->b_counter_regs_len);
-
- return 0;
+ return emit_oa_config(stream, stream->oa_config, oa_context(stream));
}
static void hsw_disable_metric_set(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
- I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
- ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
- I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
- GEN7_DOP_CLOCK_GATE_ENABLE));
+ intel_uncore_rmw(uncore, GEN6_UCGCTL1,
+ GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0);
+ intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
+ 0, GEN7_DOP_CLOCK_GATE_ENABLE);
- I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
- ~GT_NOA_ENABLE));
+ intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
}
static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
@@ -1647,9 +1975,8 @@ static void
gen8_update_reg_state_unlocked(const struct intel_context *ce,
const struct i915_perf_stream *stream)
{
- struct drm_i915_private *i915 = ce->engine->i915;
- u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset;
- u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
+ u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
i915_reg_t flex_regs[] = {
EU_PERF_CNTL0,
@@ -1672,7 +1999,8 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
reg_state[ctx_flexeu0 + i * 2 + 1] =
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
- reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu);
+ reg_state[CTX_R_PWR_CLK_STATE] =
+ intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu);
}
struct flex {
@@ -1827,9 +2155,9 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config)
{
- struct drm_i915_private *i915 = stream->dev_priv;
+ struct drm_i915_private *i915 = stream->perf->i915;
/* The MMIO offsets for Flex EU registers aren't contiguous */
- const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
+ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
struct flex regs[] = {
{
@@ -1838,7 +2166,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
},
{
GEN8_OACTXCONTROL,
- i915->perf.ctx_oactxctrl_offset + 1,
+ stream->perf->ctx_oactxctrl_offset + 1,
((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME)
@@ -1859,7 +2187,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
for (i = 2; i < ARRAY_SIZE(regs); i++)
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
- lockdep_assert_held(&i915->drm.struct_mutex);
+ lockdep_assert_held(&stream->perf->lock);
/*
* The OA register config is setup through the context image. This image
@@ -1922,8 +2250,8 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
static int gen8_enable_metric_set(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- const struct i915_oa_config *oa_config = stream->oa_config;
+ struct intel_uncore *uncore = stream->uncore;
+ struct i915_oa_config *oa_config = stream->oa_config;
int ret;
/*
@@ -1949,10 +2277,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
* be read back from automatically triggered reports, as part of the
* RPT_ID field.
*/
- if (IS_GEN_RANGE(dev_priv, 9, 11)) {
- I915_WRITE(GEN8_OA_DEBUG,
- _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
- GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
+ if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) {
+ intel_uncore_write(uncore, GEN8_OA_DEBUG,
+ _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+ GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
}
/*
@@ -1964,41 +2292,33 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
if (ret)
return ret;
- config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
- delay_after_mux();
-
- config_oa_regs(dev_priv, oa_config->b_counter_regs,
- oa_config->b_counter_regs_len);
-
- return 0;
+ return emit_oa_config(stream, oa_config, oa_context(stream));
}
static void gen8_disable_metric_set(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
gen8_configure_all_contexts(stream, NULL);
- I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
- ~GT_NOA_ENABLE));
+ intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
}
static void gen10_disable_metric_set(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
gen8_configure_all_contexts(stream, NULL);
/* Make sure we disable noa to save power. */
- I915_WRITE(RPM_CONFIG1,
- I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE);
+ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
}
static void gen7_oa_enable(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
struct i915_gem_context *ctx = stream->ctx;
u32 ctx_id = stream->specific_ctx_id;
bool periodic = stream->periodic;
@@ -2016,19 +2336,19 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
*/
gen7_init_oa_buffer(stream);
- I915_WRITE(GEN7_OACONTROL,
- (ctx_id & GEN7_OACONTROL_CTX_MASK) |
- (period_exponent <<
- GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
- (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
- (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
- (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
- GEN7_OACONTROL_ENABLE);
+ intel_uncore_write(uncore, GEN7_OACONTROL,
+ (ctx_id & GEN7_OACONTROL_CTX_MASK) |
+ (period_exponent <<
+ GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+ (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
+ (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
+ (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
+ GEN7_OACONTROL_ENABLE);
}
static void gen8_oa_enable(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_uncore *uncore = stream->uncore;
u32 report_format = stream->oa_buffer.format;
/*
@@ -2047,9 +2367,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
* filtering and instead filter on the cpu based on the context-id
* field of reports
*/
- I915_WRITE(GEN8_OACONTROL, (report_format <<
- GEN8_OA_REPORT_FORMAT_SHIFT) |
- GEN8_OA_COUNTER_ENABLE);
+ intel_uncore_write(uncore, GEN8_OACONTROL,
+ (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
+ GEN8_OA_COUNTER_ENABLE);
}
/**
@@ -2063,9 +2383,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
*/
static void i915_oa_stream_enable(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
- dev_priv->perf.ops.oa_enable(stream);
+ stream->perf->ops.oa_enable(stream);
if (stream->periodic)
hrtimer_start(&stream->poll_check_timer,
@@ -2075,7 +2393,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
static void gen7_oa_disable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = &stream->dev_priv->uncore;
+ struct intel_uncore *uncore = stream->uncore;
intel_uncore_write(uncore, GEN7_OACONTROL, 0);
if (intel_wait_for_register(uncore,
@@ -2086,7 +2404,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream)
static void gen8_oa_disable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = &stream->dev_priv->uncore;
+ struct intel_uncore *uncore = stream->uncore;
intel_uncore_write(uncore, GEN8_OACONTROL, 0);
if (intel_wait_for_register(uncore,
@@ -2105,9 +2423,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream)
*/
static void i915_oa_stream_disable(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
- dev_priv->perf.ops.oa_disable(stream);
+ stream->perf->ops.oa_disable(stream);
if (stream->periodic)
hrtimer_cancel(&stream->poll_check_timer);
@@ -2144,15 +2460,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
struct drm_i915_perf_open_param *param,
struct perf_open_properties *props)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct i915_perf *perf = stream->perf;
int format_size;
int ret;
- /* If the sysfs metrics/ directory wasn't registered for some
+ if (!props->engine) {
+ DRM_DEBUG("OA engine not specified\n");
+ return -EINVAL;
+ }
+
+ /*
+ * If the sysfs metrics/ directory wasn't registered for some
* reason then don't let userspace try their luck with config
* IDs
*/
- if (!dev_priv->perf.metrics_kobj) {
+ if (!perf->metrics_kobj) {
DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
return -EINVAL;
}
@@ -2162,16 +2484,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- if (!dev_priv->perf.ops.enable_metric_set) {
+ if (!perf->ops.enable_metric_set) {
DRM_DEBUG("OA unit not supported\n");
return -ENODEV;
}
- /* To avoid the complexity of having to accurately filter
+ /*
+ * To avoid the complexity of having to accurately filter
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (dev_priv->perf.exclusive_stream) {
+ if (perf->exclusive_stream) {
DRM_DEBUG("OA unit already in use\n");
return -EBUSY;
}
@@ -2181,9 +2504,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
+ stream->engine = props->engine;
+ stream->uncore = stream->engine->gt->uncore;
+
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
- format_size = dev_priv->perf.oa_formats[props->oa_format].size;
+ format_size = perf->oa_formats[props->oa_format].size;
stream->sample_flags |= SAMPLE_OA_REPORT;
stream->sample_size += format_size;
@@ -2192,8 +2518,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (WARN_ON(stream->oa_buffer.format_size == 0))
return -EINVAL;
+ stream->hold_preemption = props->hold_preemption;
+
stream->oa_buffer.format =
- dev_priv->perf.oa_formats[props->oa_format].format;
+ perf->oa_formats[props->oa_format].format;
stream->periodic = props->oa_periodic;
if (stream->periodic)
@@ -2207,9 +2535,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
}
- ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+ ret = alloc_noa_wait(stream);
if (ret) {
+ DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+ goto err_noa_wait_alloc;
+ }
+
+ stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
+ if (!stream->oa_config) {
DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+ ret = -EINVAL;
goto err_config;
}
@@ -2225,27 +2560,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* In our case we are expecting that taking pm + FORCEWAKE
* references will effectively disable RC6.
*/
- stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
- intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
+ intel_engine_pm_get(stream->engine);
+ intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
ret = alloc_oa_buffer(stream);
if (ret)
goto err_oa_buf_alloc;
- ret = i915_mutex_lock_interruptible(&dev_priv->drm);
- if (ret)
- goto err_lock;
-
stream->ops = &i915_oa_stream_ops;
- dev_priv->perf.exclusive_stream = stream;
+ perf->exclusive_stream = stream;
- ret = dev_priv->perf.ops.enable_metric_set(stream);
+ ret = perf->ops.enable_metric_set(stream);
if (ret) {
DRM_DEBUG("Unable to enable metric set\n");
goto err_enable;
}
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ DRM_DEBUG("opening stream oa config uuid=%s\n",
+ stream->oa_config->uuid);
hrtimer_init(&stream->poll_check_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -2256,20 +2588,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return 0;
err_enable:
- dev_priv->perf.exclusive_stream = NULL;
- dev_priv->perf.ops.disable_metric_set(stream);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ perf->exclusive_stream = NULL;
+ perf->ops.disable_metric_set(stream);
-err_lock:
free_oa_buffer(stream);
err_oa_buf_alloc:
- put_oa_config(dev_priv, stream->oa_config);
+ free_oa_configs(stream);
- intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
- intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
+ intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+ intel_engine_pm_put(stream->engine);
err_config:
+ free_noa_wait(stream);
+
+err_noa_wait_alloc:
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -2359,7 +2692,7 @@ static ssize_t i915_perf_read(struct file *file,
loff_t *ppos)
{
struct i915_perf_stream *stream = file->private_data;
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct i915_perf *perf = stream->perf;
ssize_t ret;
/* To ensure it's handled consistently we simply treat all reads of a
@@ -2382,15 +2715,15 @@ static ssize_t i915_perf_read(struct file *file,
if (ret)
return ret;
- mutex_lock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
ret = i915_perf_read_locked(stream, file,
buf, count, ppos);
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_unlock(&perf->lock);
} while (ret == -EAGAIN);
} else {
- mutex_lock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
ret = i915_perf_read_locked(stream, file, buf, count, ppos);
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_unlock(&perf->lock);
}
/* We allow the poll checking to sometimes report false positive EPOLLIN
@@ -2428,7 +2761,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
/**
* i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
- * @dev_priv: i915 device instance
* @stream: An i915 perf stream
* @file: An i915 perf stream file
* @wait: poll() state table
@@ -2437,15 +2769,14 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
* &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
* will be woken for new stream data.
*
- * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
+ * Note: The &perf->lock mutex has been taken to serialize
* with any non-file-operation driver hooks.
*
* Returns: any poll events that are ready without sleeping
*/
-static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
- struct i915_perf_stream *stream,
- struct file *file,
- poll_table *wait)
+static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
+ struct file *file,
+ poll_table *wait)
{
__poll_t events = 0;
@@ -2479,12 +2810,12 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
{
struct i915_perf_stream *stream = file->private_data;
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct i915_perf *perf = stream->perf;
__poll_t ret;
- mutex_lock(&dev_priv->perf.lock);
- ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
+ ret = i915_perf_poll_locked(stream, file, wait);
+ mutex_unlock(&perf->lock);
return ret;
}
@@ -2509,6 +2840,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
if (stream->ops->enable)
stream->ops->enable(stream);
+
+ if (stream->hold_preemption)
+ i915_gem_context_set_nopreempt(stream->ctx);
}
/**
@@ -2533,17 +2867,54 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
/* Allow stream->ops->disable() to refer to this */
stream->enabled = false;
+ if (stream->hold_preemption)
+ i915_gem_context_clear_nopreempt(stream->ctx);
+
if (stream->ops->disable)
stream->ops->disable(stream);
}
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+ unsigned long metrics_set)
+{
+ struct i915_oa_config *config;
+ long ret = stream->oa_config->id;
+
+ config = i915_perf_get_oa_config(stream->perf, metrics_set);
+ if (!config)
+ return -EINVAL;
+
+ if (config != stream->oa_config) {
+ int err;
+
+ /*
+ * If OA is bound to a specific context, emit the
+ * reconfiguration inline from that context. The update
+ * will then be ordered with respect to submission on that
+ * context.
+ *
+ * When set globally, we use a low priority kernel context,
+ * so it will effectively take effect when idle.
+ */
+ err = emit_oa_config(stream, config, oa_context(stream));
+ if (err == 0)
+ config = xchg(&stream->oa_config, config);
+ else
+ ret = err;
+ }
+
+ i915_oa_config_put(config);
+
+ return ret;
+}
+
/**
* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
* @stream: An i915 perf stream
* @cmd: the ioctl request
* @arg: the ioctl data
*
- * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
+ * Note: The &perf->lock mutex has been taken to serialize
* with any non-file-operation driver hooks.
*
* Returns: zero on success or a negative error code. Returns -EINVAL for
@@ -2560,6 +2931,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
case I915_PERF_IOCTL_DISABLE:
i915_perf_disable_locked(stream);
return 0;
+ case I915_PERF_IOCTL_CONFIG:
+ return i915_perf_config_locked(stream, arg);
}
return -EINVAL;
@@ -2581,12 +2954,12 @@ static long i915_perf_ioctl(struct file *file,
unsigned long arg)
{
struct i915_perf_stream *stream = file->private_data;
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct i915_perf *perf = stream->perf;
long ret;
- mutex_lock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
ret = i915_perf_ioctl_locked(stream, cmd, arg);
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_unlock(&perf->lock);
return ret;
}
@@ -2598,7 +2971,7 @@ static long i915_perf_ioctl(struct file *file,
* Frees all resources associated with the given i915 perf @stream, disabling
* any associated data capture in the process.
*
- * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
+ * Note: The &perf->lock mutex has been taken to serialize
* with any non-file-operation driver hooks.
*/
static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
@@ -2609,8 +2982,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
if (stream->ops->destroy)
stream->ops->destroy(stream);
- list_del(&stream->link);
-
if (stream->ctx)
i915_gem_context_put(stream->ctx);
@@ -2631,14 +3002,14 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
static int i915_perf_release(struct inode *inode, struct file *file)
{
struct i915_perf_stream *stream = file->private_data;
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct i915_perf *perf = stream->perf;
- mutex_lock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
i915_perf_destroy_locked(stream);
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_unlock(&perf->lock);
/* Release the reference the perf stream kept on the driver. */
- drm_dev_put(&dev_priv->drm);
+ drm_dev_put(&perf->i915->drm);
return 0;
}
@@ -2660,7 +3031,7 @@ static const struct file_operations fops = {
/**
* i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
- * @dev_priv: i915 device instance
+ * @perf: i915 perf instance
* @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
* @props: individually validated u64 property value pairs
* @file: drm file
@@ -2668,7 +3039,7 @@ static const struct file_operations fops = {
* See i915_perf_ioctl_open() for interface details.
*
* Implements further stream config validation and stream initialization on
- * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex
+ * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
* taken to serialize with any non-file-operation driver hooks.
*
* Note: at this point the @props have only been validated in isolation and
@@ -2683,7 +3054,7 @@ static const struct file_operations fops = {
* Returns: zero on success or a negative error code.
*/
static int
-i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
+i915_perf_open_ioctl_locked(struct i915_perf *perf,
struct drm_i915_perf_open_param *param,
struct perf_open_properties *props,
struct drm_file *file)
@@ -2708,6 +3079,15 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
}
}
+ if (props->hold_preemption) {
+ if (!props->single_context) {
+ DRM_DEBUG("preemption disable with no context\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ privileged_op = true;
+ }
+
/*
* On Haswell the OA unit supports clock gating off for a specific
* context and in this mode there's no visibility of metrics for the
@@ -2722,7 +3102,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
* MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
* enable the OA unit by default.
*/
- if (IS_HASWELL(dev_priv) && specific_ctx)
+ if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption)
privileged_op = false;
/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
@@ -2732,7 +3112,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
*/
if (privileged_op &&
i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
- DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
+ DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
ret = -EACCES;
goto err_ctx;
}
@@ -2743,7 +3123,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
goto err_ctx;
}
- stream->dev_priv = dev_priv;
+ stream->perf = perf;
stream->ctx = specific_ctx;
ret = i915_oa_stream_init(stream, param, props);
@@ -2759,8 +3139,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
goto err_flags;
}
- list_add(&stream->link, &dev_priv->perf.streams);
-
if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
f_flags |= O_CLOEXEC;
if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
@@ -2769,7 +3147,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
if (stream_fd < 0) {
ret = stream_fd;
- goto err_open;
+ goto err_flags;
}
if (!(param->flags & I915_PERF_FLAG_DISABLED))
@@ -2778,12 +3156,10 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
/* Take a reference on the driver that will be kept with stream_fd
* until its release.
*/
- drm_dev_get(&dev_priv->drm);
+ drm_dev_get(&perf->i915->drm);
return stream_fd;
-err_open:
- list_del(&stream->link);
err_flags:
if (stream->ops->destroy)
stream->ops->destroy(stream);
@@ -2796,15 +3172,15 @@ err:
return ret;
}
-static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
+static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
{
return div64_u64(1000000000ULL * (2ULL << exponent),
- 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
+ 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz);
}
/**
* read_properties_unlocked - validate + copy userspace stream open properties
- * @dev_priv: i915 device instance
+ * @perf: i915 perf instance
* @uprops: The array of u64 key value pairs given by userspace
* @n_props: The number of key value pairs expected in @uprops
* @props: The stream configuration built up while validating properties
@@ -2817,7 +3193,7 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
* we shouldn't validate or assume anything about ordering here. This doesn't
* rule out defining new properties with ordering requirements in the future.
*/
-static int read_properties_unlocked(struct drm_i915_private *dev_priv,
+static int read_properties_unlocked(struct i915_perf *perf,
u64 __user *uprops,
u32 n_props,
struct perf_open_properties *props)
@@ -2832,6 +3208,15 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
return -EINVAL;
}
+ /* At the moment we only support using i915-perf on the RCS. */
+ props->engine = intel_engine_lookup_user(perf->i915,
+ I915_ENGINE_CLASS_RENDER,
+ 0);
+ if (!props->engine) {
+ DRM_DEBUG("No RENDER-capable engines\n");
+ return -EINVAL;
+ }
+
/* Considering that ID = 0 is reserved and assuming that we don't
* (currently) expect any configurations to ever specify duplicate
* values for a particular property ID then the last _PROP_MAX value is
@@ -2883,7 +3268,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
value);
return -EINVAL;
}
- if (!dev_priv->perf.oa_formats[value].size) {
+ if (!perf->oa_formats[value].size) {
DRM_DEBUG("Unsupported OA report format %llu\n",
value);
return -EINVAL;
@@ -2904,7 +3289,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
*/
BUILD_BUG_ON(sizeof(oa_period) != 8);
- oa_period = oa_exponent_to_ns(dev_priv, value);
+ oa_period = oa_exponent_to_ns(perf, value);
/* This check is primarily to ensure that oa_period <=
* UINT32_MAX (before passing to do_div which only
@@ -2929,6 +3314,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
props->oa_periodic = true;
props->oa_period_exponent = value;
break;
+ case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
+ props->hold_preemption = !!value;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -2958,7 +3346,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
* mutex to avoid an awkward lockdep with mmap_sem.
*
* Most of the implementation details are handled by
- * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock
+ * i915_perf_open_ioctl_locked() after taking the &perf->lock
* mutex for serializing with any non-file-operation driver hooks.
*
* Return: A newly opened i915 Perf stream file descriptor or negative
@@ -2967,13 +3355,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
int i915_perf_open_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_perf *perf = &to_i915(dev)->perf;
struct drm_i915_perf_open_param *param = data;
struct perf_open_properties props;
u32 known_open_flags;
int ret;
- if (!dev_priv->perf.initialized) {
+ if (!perf->i915) {
DRM_DEBUG("i915 perf interface not available for this system\n");
return -ENOTSUPP;
}
@@ -2986,124 +3374,128 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- ret = read_properties_unlocked(dev_priv,
+ ret = read_properties_unlocked(perf,
u64_to_user_ptr(param->properties_ptr),
param->num_properties,
&props);
if (ret)
return ret;
- mutex_lock(&dev_priv->perf.lock);
- ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
+ ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
+ mutex_unlock(&perf->lock);
return ret;
}
/**
* i915_perf_register - exposes i915-perf to userspace
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
*
* In particular OA metric sets are advertised under a sysfs metrics/
* directory allowing userspace to enumerate valid IDs that can be
* used to open an i915-perf stream.
*/
-void i915_perf_register(struct drm_i915_private *dev_priv)
+void i915_perf_register(struct drm_i915_private *i915)
{
+ struct i915_perf *perf = &i915->perf;
int ret;
- if (!dev_priv->perf.initialized)
+ if (!perf->i915)
return;
/* To be sure we're synchronized with an attempted
* i915_perf_open_ioctl(); considering that we register after
* being exposed to userspace.
*/
- mutex_lock(&dev_priv->perf.lock);
+ mutex_lock(&perf->lock);
- dev_priv->perf.metrics_kobj =
+ perf->metrics_kobj =
kobject_create_and_add("metrics",
- &dev_priv->drm.primary->kdev->kobj);
- if (!dev_priv->perf.metrics_kobj)
+ &i915->drm.primary->kdev->kobj);
+ if (!perf->metrics_kobj)
goto exit;
- sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr);
-
- if (INTEL_GEN(dev_priv) >= 11) {
- i915_perf_load_test_config_icl(dev_priv);
- } else if (IS_CANNONLAKE(dev_priv)) {
- i915_perf_load_test_config_cnl(dev_priv);
- } else if (IS_COFFEELAKE(dev_priv)) {
- if (IS_CFL_GT2(dev_priv))
- i915_perf_load_test_config_cflgt2(dev_priv);
- if (IS_CFL_GT3(dev_priv))
- i915_perf_load_test_config_cflgt3(dev_priv);
- } else if (IS_GEMINILAKE(dev_priv)) {
- i915_perf_load_test_config_glk(dev_priv);
- } else if (IS_KABYLAKE(dev_priv)) {
- if (IS_KBL_GT2(dev_priv))
- i915_perf_load_test_config_kblgt2(dev_priv);
- else if (IS_KBL_GT3(dev_priv))
- i915_perf_load_test_config_kblgt3(dev_priv);
- } else if (IS_BROXTON(dev_priv)) {
- i915_perf_load_test_config_bxt(dev_priv);
- } else if (IS_SKYLAKE(dev_priv)) {
- if (IS_SKL_GT2(dev_priv))
- i915_perf_load_test_config_sklgt2(dev_priv);
- else if (IS_SKL_GT3(dev_priv))
- i915_perf_load_test_config_sklgt3(dev_priv);
- else if (IS_SKL_GT4(dev_priv))
- i915_perf_load_test_config_sklgt4(dev_priv);
- } else if (IS_CHERRYVIEW(dev_priv)) {
- i915_perf_load_test_config_chv(dev_priv);
- } else if (IS_BROADWELL(dev_priv)) {
- i915_perf_load_test_config_bdw(dev_priv);
- } else if (IS_HASWELL(dev_priv)) {
- i915_perf_load_test_config_hsw(dev_priv);
-}
-
- if (dev_priv->perf.test_config.id == 0)
+ sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr);
+
+ if (INTEL_GEN(i915) >= 11) {
+ i915_perf_load_test_config_icl(i915);
+ } else if (IS_CANNONLAKE(i915)) {
+ i915_perf_load_test_config_cnl(i915);
+ } else if (IS_COFFEELAKE(i915)) {
+ if (IS_CFL_GT2(i915))
+ i915_perf_load_test_config_cflgt2(i915);
+ if (IS_CFL_GT3(i915))
+ i915_perf_load_test_config_cflgt3(i915);
+ } else if (IS_GEMINILAKE(i915)) {
+ i915_perf_load_test_config_glk(i915);
+ } else if (IS_KABYLAKE(i915)) {
+ if (IS_KBL_GT2(i915))
+ i915_perf_load_test_config_kblgt2(i915);
+ else if (IS_KBL_GT3(i915))
+ i915_perf_load_test_config_kblgt3(i915);
+ } else if (IS_BROXTON(i915)) {
+ i915_perf_load_test_config_bxt(i915);
+ } else if (IS_SKYLAKE(i915)) {
+ if (IS_SKL_GT2(i915))
+ i915_perf_load_test_config_sklgt2(i915);
+ else if (IS_SKL_GT3(i915))
+ i915_perf_load_test_config_sklgt3(i915);
+ else if (IS_SKL_GT4(i915))
+ i915_perf_load_test_config_sklgt4(i915);
+ } else if (IS_CHERRYVIEW(i915)) {
+ i915_perf_load_test_config_chv(i915);
+ } else if (IS_BROADWELL(i915)) {
+ i915_perf_load_test_config_bdw(i915);
+ } else if (IS_HASWELL(i915)) {
+ i915_perf_load_test_config_hsw(i915);
+ }
+
+ if (perf->test_config.id == 0)
goto sysfs_error;
- ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
- &dev_priv->perf.test_config.sysfs_metric);
+ ret = sysfs_create_group(perf->metrics_kobj,
+ &perf->test_config.sysfs_metric);
if (ret)
goto sysfs_error;
- atomic_set(&dev_priv->perf.test_config.ref_count, 1);
+ perf->test_config.perf = perf;
+ kref_init(&perf->test_config.ref);
goto exit;
sysfs_error:
- kobject_put(dev_priv->perf.metrics_kobj);
- dev_priv->perf.metrics_kobj = NULL;
+ kobject_put(perf->metrics_kobj);
+ perf->metrics_kobj = NULL;
exit:
- mutex_unlock(&dev_priv->perf.lock);
+ mutex_unlock(&perf->lock);
}
/**
* i915_perf_unregister - hide i915-perf from userspace
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
*
* i915-perf state cleanup is split up into an 'unregister' and
* 'deinit' phase where the interface is first hidden from
* userspace by i915_perf_unregister() before cleaning up
* remaining state in i915_perf_fini().
*/
-void i915_perf_unregister(struct drm_i915_private *dev_priv)
+void i915_perf_unregister(struct drm_i915_private *i915)
{
- if (!dev_priv->perf.metrics_kobj)
+ struct i915_perf *perf = &i915->perf;
+
+ if (!perf->metrics_kobj)
return;
- sysfs_remove_group(dev_priv->perf.metrics_kobj,
- &dev_priv->perf.test_config.sysfs_metric);
+ sysfs_remove_group(perf->metrics_kobj,
+ &perf->test_config.sysfs_metric);
- kobject_put(dev_priv->perf.metrics_kobj);
- dev_priv->perf.metrics_kobj = NULL;
+ kobject_put(perf->metrics_kobj);
+ perf->metrics_kobj = NULL;
}
-static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
{
static const i915_reg_t flex_eu_regs[] = {
EU_PERF_CNTL0,
@@ -3123,7 +3515,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
return false;
}
-static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
@@ -3133,7 +3525,7 @@ static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32
addr <= i915_mmio_reg_offset(OACEC7_1));
}
-static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
(addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
@@ -3144,34 +3536,34 @@ static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
}
-static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen7_is_valid_mux_addr(dev_priv, addr) ||
+ return gen7_is_valid_mux_addr(perf, addr) ||
addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
(addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
}
-static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen8_is_valid_mux_addr(dev_priv, addr) ||
+ return gen8_is_valid_mux_addr(perf, addr) ||
addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
(addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
}
-static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen7_is_valid_mux_addr(dev_priv, addr) ||
+ return gen7_is_valid_mux_addr(perf, addr) ||
(addr >= 0x25100 && addr <= 0x2FF90) ||
(addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
}
-static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
+static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return gen7_is_valid_mux_addr(dev_priv, addr) ||
+ return gen7_is_valid_mux_addr(perf, addr) ||
(addr >= 0x182300 && addr <= 0x1823A4);
}
@@ -3194,8 +3586,8 @@ static u32 mask_reg_value(u32 reg, u32 val)
return val;
}
-static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
- bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr),
+static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
+ bool (*is_valid)(struct i915_perf *perf, u32 addr),
u32 __user *regs,
u32 n_regs)
{
@@ -3225,7 +3617,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
if (err)
goto addr_err;
- if (!is_valid(dev_priv, addr)) {
+ if (!is_valid(perf, addr)) {
DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
err = -EINVAL;
goto addr_err;
@@ -3258,7 +3650,7 @@ static ssize_t show_dynamic_id(struct device *dev,
return sprintf(buf, "%d\n", oa_config->id);
}
-static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
+static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
struct i915_oa_config *oa_config)
{
sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
@@ -3273,7 +3665,7 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
oa_config->sysfs_metric.name = oa_config->uuid;
oa_config->sysfs_metric.attrs = oa_config->attrs;
- return sysfs_create_group(dev_priv->perf.metrics_kobj,
+ return sysfs_create_group(perf->metrics_kobj,
&oa_config->sysfs_metric);
}
@@ -3293,17 +3685,18 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_perf *perf = &to_i915(dev)->perf;
struct drm_i915_perf_oa_config *args = data;
struct i915_oa_config *oa_config, *tmp;
+ static struct i915_oa_reg *regs;
int err, id;
- if (!dev_priv->perf.initialized) {
+ if (!perf->i915) {
DRM_DEBUG("i915 perf interface not available for this system\n");
return -ENOTSUPP;
}
- if (!dev_priv->perf.metrics_kobj) {
+ if (!perf->metrics_kobj) {
DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
return -EINVAL;
}
@@ -3326,7 +3719,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
return -ENOMEM;
}
- atomic_set(&oa_config->ref_count, 1);
+ oa_config->perf = perf;
+ kref_init(&oa_config->ref);
if (!uuid_is_valid(args->uuid)) {
DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3340,59 +3734,59 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
oa_config->mux_regs_len = args->n_mux_regs;
- oa_config->mux_regs =
- alloc_oa_regs(dev_priv,
- dev_priv->perf.ops.is_valid_mux_reg,
- u64_to_user_ptr(args->mux_regs_ptr),
- args->n_mux_regs);
+ regs = alloc_oa_regs(perf,
+ perf->ops.is_valid_mux_reg,
+ u64_to_user_ptr(args->mux_regs_ptr),
+ args->n_mux_regs);
- if (IS_ERR(oa_config->mux_regs)) {
+ if (IS_ERR(regs)) {
DRM_DEBUG("Failed to create OA config for mux_regs\n");
- err = PTR_ERR(oa_config->mux_regs);
+ err = PTR_ERR(regs);
goto reg_err;
}
+ oa_config->mux_regs = regs;
oa_config->b_counter_regs_len = args->n_boolean_regs;
- oa_config->b_counter_regs =
- alloc_oa_regs(dev_priv,
- dev_priv->perf.ops.is_valid_b_counter_reg,
- u64_to_user_ptr(args->boolean_regs_ptr),
- args->n_boolean_regs);
+ regs = alloc_oa_regs(perf,
+ perf->ops.is_valid_b_counter_reg,
+ u64_to_user_ptr(args->boolean_regs_ptr),
+ args->n_boolean_regs);
- if (IS_ERR(oa_config->b_counter_regs)) {
+ if (IS_ERR(regs)) {
DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
- err = PTR_ERR(oa_config->b_counter_regs);
+ err = PTR_ERR(regs);
goto reg_err;
}
+ oa_config->b_counter_regs = regs;
- if (INTEL_GEN(dev_priv) < 8) {
+ if (INTEL_GEN(perf->i915) < 8) {
if (args->n_flex_regs != 0) {
err = -EINVAL;
goto reg_err;
}
} else {
oa_config->flex_regs_len = args->n_flex_regs;
- oa_config->flex_regs =
- alloc_oa_regs(dev_priv,
- dev_priv->perf.ops.is_valid_flex_reg,
- u64_to_user_ptr(args->flex_regs_ptr),
- args->n_flex_regs);
+ regs = alloc_oa_regs(perf,
+ perf->ops.is_valid_flex_reg,
+ u64_to_user_ptr(args->flex_regs_ptr),
+ args->n_flex_regs);
- if (IS_ERR(oa_config->flex_regs)) {
+ if (IS_ERR(regs)) {
DRM_DEBUG("Failed to create OA config for flex_regs\n");
- err = PTR_ERR(oa_config->flex_regs);
+ err = PTR_ERR(regs);
goto reg_err;
}
+ oa_config->flex_regs = regs;
}
- err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
+ err = mutex_lock_interruptible(&perf->metrics_lock);
if (err)
goto reg_err;
/* We shouldn't have too many configs, so this iteration shouldn't be
* too costly.
*/
- idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) {
+ idr_for_each_entry(&perf->metrics_idr, tmp, id) {
if (!strcmp(tmp->uuid, oa_config->uuid)) {
DRM_DEBUG("OA config already exists with this uuid\n");
err = -EADDRINUSE;
@@ -3400,14 +3794,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
}
}
- err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config);
+ err = create_dynamic_oa_sysfs_entry(perf, oa_config);
if (err) {
DRM_DEBUG("Failed to create sysfs entry for OA config\n");
goto sysfs_err;
}
/* Config id 0 is invalid, id 1 for kernel stored test config. */
- oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr,
+ oa_config->id = idr_alloc(&perf->metrics_idr,
oa_config, 2,
0, GFP_KERNEL);
if (oa_config->id < 0) {
@@ -3416,16 +3810,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
goto sysfs_err;
}
- mutex_unlock(&dev_priv->perf.metrics_lock);
+ mutex_unlock(&perf->metrics_lock);
DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
return oa_config->id;
sysfs_err:
- mutex_unlock(&dev_priv->perf.metrics_lock);
+ mutex_unlock(&perf->metrics_lock);
reg_err:
- put_oa_config(dev_priv, oa_config);
+ i915_oa_config_put(oa_config);
DRM_DEBUG("Failed to add new OA config\n");
return err;
}
@@ -3444,12 +3838,12 @@ reg_err:
int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_perf *perf = &to_i915(dev)->perf;
u64 *arg = data;
struct i915_oa_config *oa_config;
int ret;
- if (!dev_priv->perf.initialized) {
+ if (!perf->i915) {
DRM_DEBUG("i915 perf interface not available for this system\n");
return -ENOTSUPP;
}
@@ -3459,31 +3853,33 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
return -EACCES;
}
- ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
+ ret = mutex_lock_interruptible(&perf->metrics_lock);
if (ret)
- goto lock_err;
+ return ret;
- oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
+ oa_config = idr_find(&perf->metrics_idr, *arg);
if (!oa_config) {
DRM_DEBUG("Failed to remove unknown OA config\n");
ret = -ENOENT;
- goto config_err;
+ goto err_unlock;
}
GEM_BUG_ON(*arg != oa_config->id);
- sysfs_remove_group(dev_priv->perf.metrics_kobj,
- &oa_config->sysfs_metric);
+ sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
- idr_remove(&dev_priv->perf.metrics_idr, *arg);
+ idr_remove(&perf->metrics_idr, *arg);
+
+ mutex_unlock(&perf->metrics_lock);
DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
- put_oa_config(dev_priv, oa_config);
+ i915_oa_config_put(oa_config);
-config_err:
- mutex_unlock(&dev_priv->perf.metrics_lock);
-lock_err:
+ return 0;
+
+err_unlock:
+ mutex_unlock(&perf->metrics_lock);
return ret;
}
@@ -3531,103 +3927,103 @@ static struct ctl_table dev_root[] = {
/**
* i915_perf_init - initialize i915-perf state on module load
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
*
* Initializes i915-perf state without exposing anything to userspace.
*
* Note: i915-perf initialization is split into an 'init' and 'register'
* phase with the i915_perf_register() exposing state to userspace.
*/
-void i915_perf_init(struct drm_i915_private *dev_priv)
-{
- if (IS_HASWELL(dev_priv)) {
- dev_priv->perf.ops.is_valid_b_counter_reg =
- gen7_is_valid_b_counter_addr;
- dev_priv->perf.ops.is_valid_mux_reg =
- hsw_is_valid_mux_addr;
- dev_priv->perf.ops.is_valid_flex_reg = NULL;
- dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set;
- dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set;
- dev_priv->perf.ops.oa_enable = gen7_oa_enable;
- dev_priv->perf.ops.oa_disable = gen7_oa_disable;
- dev_priv->perf.ops.read = gen7_oa_read;
- dev_priv->perf.ops.oa_hw_tail_read =
- gen7_oa_hw_tail_read;
-
- dev_priv->perf.oa_formats = hsw_oa_formats;
- } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
+void i915_perf_init(struct drm_i915_private *i915)
+{
+ struct i915_perf *perf = &i915->perf;
+
+ /* XXX const struct i915_perf_ops! */
+
+ if (IS_HASWELL(i915)) {
+ perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
+ perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
+ perf->ops.is_valid_flex_reg = NULL;
+ perf->ops.enable_metric_set = hsw_enable_metric_set;
+ perf->ops.disable_metric_set = hsw_disable_metric_set;
+ perf->ops.oa_enable = gen7_oa_enable;
+ perf->ops.oa_disable = gen7_oa_disable;
+ perf->ops.read = gen7_oa_read;
+ perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
+
+ perf->oa_formats = hsw_oa_formats;
+ } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
/* Note: that although we could theoretically also support the
* legacy ringbuffer mode on BDW (and earlier iterations of
* this driver, before upstreaming did this) it didn't seem
* worth the complexity to maintain now that BDW+ enable
* execlist mode by default.
*/
- dev_priv->perf.oa_formats = gen8_plus_oa_formats;
+ perf->oa_formats = gen8_plus_oa_formats;
- dev_priv->perf.ops.oa_enable = gen8_oa_enable;
- dev_priv->perf.ops.oa_disable = gen8_oa_disable;
- dev_priv->perf.ops.read = gen8_oa_read;
- dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
+ perf->ops.oa_enable = gen8_oa_enable;
+ perf->ops.oa_disable = gen8_oa_disable;
+ perf->ops.read = gen8_oa_read;
+ perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
- if (IS_GEN_RANGE(dev_priv, 8, 9)) {
- dev_priv->perf.ops.is_valid_b_counter_reg =
+ if (IS_GEN_RANGE(i915, 8, 9)) {
+ perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.ops.is_valid_mux_reg =
+ perf->ops.is_valid_mux_reg =
gen8_is_valid_mux_addr;
- dev_priv->perf.ops.is_valid_flex_reg =
+ perf->ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
- if (IS_CHERRYVIEW(dev_priv)) {
- dev_priv->perf.ops.is_valid_mux_reg =
+ if (IS_CHERRYVIEW(i915)) {
+ perf->ops.is_valid_mux_reg =
chv_is_valid_mux_addr;
}
- dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
- dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set;
+ perf->ops.enable_metric_set = gen8_enable_metric_set;
+ perf->ops.disable_metric_set = gen8_disable_metric_set;
- if (IS_GEN(dev_priv, 8)) {
- dev_priv->perf.ctx_oactxctrl_offset = 0x120;
- dev_priv->perf.ctx_flexeu0_offset = 0x2ce;
+ if (IS_GEN(i915, 8)) {
+ perf->ctx_oactxctrl_offset = 0x120;
+ perf->ctx_flexeu0_offset = 0x2ce;
- dev_priv->perf.gen8_valid_ctx_bit = BIT(25);
+ perf->gen8_valid_ctx_bit = BIT(25);
} else {
- dev_priv->perf.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.ctx_flexeu0_offset = 0x3de;
+ perf->ctx_oactxctrl_offset = 0x128;
+ perf->ctx_flexeu0_offset = 0x3de;
- dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
+ perf->gen8_valid_ctx_bit = BIT(16);
}
- } else if (IS_GEN_RANGE(dev_priv, 10, 11)) {
- dev_priv->perf.ops.is_valid_b_counter_reg =
+ } else if (IS_GEN_RANGE(i915, 10, 11)) {
+ perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
- dev_priv->perf.ops.is_valid_mux_reg =
+ perf->ops.is_valid_mux_reg =
gen10_is_valid_mux_addr;
- dev_priv->perf.ops.is_valid_flex_reg =
+ perf->ops.is_valid_flex_reg =
gen8_is_valid_flex_addr;
- dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set;
- dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set;
+ perf->ops.enable_metric_set = gen8_enable_metric_set;
+ perf->ops.disable_metric_set = gen10_disable_metric_set;
- if (IS_GEN(dev_priv, 10)) {
- dev_priv->perf.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.ctx_flexeu0_offset = 0x3de;
+ if (IS_GEN(i915, 10)) {
+ perf->ctx_oactxctrl_offset = 0x128;
+ perf->ctx_flexeu0_offset = 0x3de;
} else {
- dev_priv->perf.ctx_oactxctrl_offset = 0x124;
- dev_priv->perf.ctx_flexeu0_offset = 0x78e;
+ perf->ctx_oactxctrl_offset = 0x124;
+ perf->ctx_flexeu0_offset = 0x78e;
}
- dev_priv->perf.gen8_valid_ctx_bit = BIT(16);
+ perf->gen8_valid_ctx_bit = BIT(16);
}
}
- if (dev_priv->perf.ops.enable_metric_set) {
- INIT_LIST_HEAD(&dev_priv->perf.streams);
- mutex_init(&dev_priv->perf.lock);
+ if (perf->ops.enable_metric_set) {
+ mutex_init(&perf->lock);
oa_sample_rate_hard_limit = 1000 *
- (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
- dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
+ (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2);
+ perf->sysctl_header = register_sysctl_table(dev_root);
- mutex_init(&dev_priv->perf.metrics_lock);
- idr_init(&dev_priv->perf.metrics_idr);
+ mutex_init(&perf->metrics_lock);
+ idr_init(&perf->metrics_idr);
/* We set up some ratelimit state to potentially throttle any
* _NOTES about spurious, invalid OA reports which we don't
@@ -3639,44 +4035,70 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
*
* Using the same limiting factors as printk_ratelimit()
*/
- ratelimit_state_init(&dev_priv->perf.spurious_report_rs,
- 5 * HZ, 10);
+ ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
/* Since we use a DRM_NOTE for spurious reports it would be
* inconsistent to let __ratelimit() automatically print a
* warning for throttling.
*/
- ratelimit_set_flags(&dev_priv->perf.spurious_report_rs,
+ ratelimit_set_flags(&perf->spurious_report_rs,
RATELIMIT_MSG_ON_RELEASE);
- dev_priv->perf.initialized = true;
+ atomic64_set(&perf->noa_programming_delay,
+ 500 * 1000 /* 500us */);
+
+ perf->i915 = i915;
}
}
static int destroy_config(int id, void *p, void *data)
{
- struct drm_i915_private *dev_priv = data;
- struct i915_oa_config *oa_config = p;
-
- put_oa_config(dev_priv, oa_config);
-
+ i915_oa_config_put(p);
return 0;
}
/**
* i915_perf_fini - Counter part to i915_perf_init()
- * @dev_priv: i915 device instance
+ * @i915: i915 device instance
*/
-void i915_perf_fini(struct drm_i915_private *dev_priv)
+void i915_perf_fini(struct drm_i915_private *i915)
{
- if (!dev_priv->perf.initialized)
+ struct i915_perf *perf = &i915->perf;
+
+ if (!perf->i915)
return;
- idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
- idr_destroy(&dev_priv->perf.metrics_idr);
+ idr_for_each(&perf->metrics_idr, destroy_config, perf);
+ idr_destroy(&perf->metrics_idr);
- unregister_sysctl_table(dev_priv->perf.sysctl_header);
+ unregister_sysctl_table(perf->sysctl_header);
- memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops));
+ memset(&perf->ops, 0, sizeof(perf->ops));
+ perf->i915 = NULL;
+}
- dev_priv->perf.initialized = false;
+/**
+ * i915_perf_ioctl_version - Version of the i915-perf subsystem
+ *
+ * This version number is used by userspace to detect available features.
+ */
+int i915_perf_ioctl_version(void)
+{
+ /*
+ * 1: Initial version
+ * I915_PERF_IOCTL_ENABLE
+ * I915_PERF_IOCTL_DISABLE
+ *
+ * 2: Added runtime modification of OA config.
+ * I915_PERF_IOCTL_CONFIG
+ *
+ * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
+ * preemption on a particular context so that performance data is
+ * accessible from a delta of MI_RPC reports without looking at the
+ * OA buffer.
+ */
+ return 3;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_perf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index f4fb311184b1..4ceebce72060 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -6,11 +6,15 @@
#ifndef __I915_PERF_H__
#define __I915_PERF_H__
+#include <linux/kref.h>
#include <linux/types.h>
+#include "i915_perf_types.h"
+
struct drm_device;
struct drm_file;
struct drm_i915_private;
+struct i915_oa_config;
struct intel_context;
struct intel_engine_cs;
@@ -18,6 +22,7 @@ void i915_perf_init(struct drm_i915_private *i915);
void i915_perf_fini(struct drm_i915_private *i915);
void i915_perf_register(struct drm_i915_private *i915);
void i915_perf_unregister(struct drm_i915_private *i915);
+int i915_perf_ioctl_version(void);
int i915_perf_open_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
@@ -25,7 +30,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+
void i915_oa_init_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine);
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set);
+
+static inline struct i915_oa_config *
+i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+ if (kref_get_unless_zero(&oa_config->ref))
+ return oa_config;
+ else
+ return NULL;
+}
+
+void i915_oa_config_release(struct kref *ref);
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+ if (!oa_config)
+ return;
+
+ kref_put(&oa_config->ref, i915_oa_config_release);
+}
+
#endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
new file mode 100644
index 000000000000..a1f733fc905a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -0,0 +1,406 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_PERF_TYPES_H_
+#define _I915_PERF_TYPES_H_
+
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/hrtimer.h>
+#include <linux/llist.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/wait.h>
+
+#include "i915_reg.h"
+#include "intel_wakeref.h"
+
+struct drm_i915_private;
+struct file;
+struct i915_gem_context;
+struct i915_perf;
+struct i915_vma;
+struct intel_context;
+struct intel_engine_cs;
+
+struct i915_oa_format {
+ u32 format;
+ int size;
+};
+
+struct i915_oa_reg {
+ i915_reg_t addr;
+ u32 value;
+};
+
+struct i915_oa_config {
+ struct i915_perf *perf;
+
+ char uuid[UUID_STRING_LEN + 1];
+ int id;
+
+ const struct i915_oa_reg *mux_regs;
+ u32 mux_regs_len;
+ const struct i915_oa_reg *b_counter_regs;
+ u32 b_counter_regs_len;
+ const struct i915_oa_reg *flex_regs;
+ u32 flex_regs_len;
+
+ struct attribute_group sysfs_metric;
+ struct attribute *attrs[2];
+ struct device_attribute sysfs_metric_id;
+
+ struct kref ref;
+ struct rcu_head rcu;
+};
+
+struct i915_perf_stream;
+
+/**
+ * struct i915_perf_stream_ops - the OPs to support a specific stream type
+ */
+struct i915_perf_stream_ops {
+ /**
+ * @enable: Enables the collection of HW samples, either in response to
+ * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
+ * without `I915_PERF_FLAG_DISABLED`.
+ */
+ void (*enable)(struct i915_perf_stream *stream);
+
+ /**
+ * @disable: Disables the collection of HW samples, either in response
+ * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
+ * the stream.
+ */
+ void (*disable)(struct i915_perf_stream *stream);
+
+ /**
+ * @poll_wait: Call poll_wait, passing a wait queue that will be woken
+ * once there is something ready to read() for the stream
+ */
+ void (*poll_wait)(struct i915_perf_stream *stream,
+ struct file *file,
+ poll_table *wait);
+
+ /**
+ * @wait_unlocked: For handling a blocking read, wait until there is
+ * something to ready to read() for the stream. E.g. wait on the same
+ * wait queue that would be passed to poll_wait().
+ */
+ int (*wait_unlocked)(struct i915_perf_stream *stream);
+
+ /**
+ * @read: Copy buffered metrics as records to userspace
+ * **buf**: the userspace, destination buffer
+ * **count**: the number of bytes to copy, requested by userspace
+ * **offset**: zero at the start of the read, updated as the read
+ * proceeds, it represents how many bytes have been copied so far and
+ * the buffer offset for copying the next record.
+ *
+ * Copy as many buffered i915 perf samples and records for this stream
+ * to userspace as will fit in the given buffer.
+ *
+ * Only write complete records; returning -%ENOSPC if there isn't room
+ * for a complete record.
+ *
+ * Return any error condition that results in a short read such as
+ * -%ENOSPC or -%EFAULT, even though these may be squashed before
+ * returning to userspace.
+ */
+ int (*read)(struct i915_perf_stream *stream,
+ char __user *buf,
+ size_t count,
+ size_t *offset);
+
+ /**
+ * @destroy: Cleanup any stream specific resources.
+ *
+ * The stream will always be disabled before this is called.
+ */
+ void (*destroy)(struct i915_perf_stream *stream);
+};
+
+/**
+ * struct i915_perf_stream - state for a single open stream FD
+ */
+struct i915_perf_stream {
+ /**
+ * @perf: i915_perf backpointer
+ */
+ struct i915_perf *perf;
+
+ /**
+ * @uncore: mmio access path
+ */
+ struct intel_uncore *uncore;
+
+ /**
+ * @engine: Engine associated with this performance stream.
+ */
+ struct intel_engine_cs *engine;
+
+ /**
+ * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
+ * properties given when opening a stream, representing the contents
+ * of a single sample as read() by userspace.
+ */
+ u32 sample_flags;
+
+ /**
+ * @sample_size: Considering the configured contents of a sample
+ * combined with the required header size, this is the total size
+ * of a single sample record.
+ */
+ int sample_size;
+
+ /**
+ * @ctx: %NULL if measuring system-wide across all contexts or a
+ * specific context that is being monitored.
+ */
+ struct i915_gem_context *ctx;
+
+ /**
+ * @enabled: Whether the stream is currently enabled, considering
+ * whether the stream was opened in a disabled state and based
+ * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
+ */
+ bool enabled;
+
+ /**
+ * @hold_preemption: Whether preemption is put on hold for command
+ * submissions done on the @ctx. This is useful for some drivers that
+ * cannot easily post process the OA buffer context to subtract delta
+ * of performance counters not associated with @ctx.
+ */
+ bool hold_preemption;
+
+ /**
+ * @ops: The callbacks providing the implementation of this specific
+ * type of configured stream.
+ */
+ const struct i915_perf_stream_ops *ops;
+
+ /**
+ * @oa_config: The OA configuration used by the stream.
+ */
+ struct i915_oa_config *oa_config;
+
+ /**
+ * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+ * each time @oa_config changes.
+ */
+ struct llist_head oa_config_bos;
+
+ /**
+ * @pinned_ctx: The OA context specific information.
+ */
+ struct intel_context *pinned_ctx;
+ u32 specific_ctx_id;
+ u32 specific_ctx_id_mask;
+
+ struct hrtimer poll_check_timer;
+ wait_queue_head_t poll_wq;
+ bool pollin;
+
+ bool periodic;
+ int period_exponent;
+
+ /**
+ * @oa_buffer: State of the OA buffer.
+ */
+ struct {
+ struct i915_vma *vma;
+ u8 *vaddr;
+ u32 last_ctx_id;
+ int format;
+ int format_size;
+ int size_exponent;
+
+ /**
+ * @ptr_lock: Locks reads and writes to all head/tail state
+ *
+ * Consider: the head and tail pointer state needs to be read
+ * consistently from a hrtimer callback (atomic context) and
+ * read() fop (user context) with tail pointer updates happening
+ * in atomic context and head updates in user context and the
+ * (unlikely) possibility of read() errors needing to reset all
+ * head/tail state.
+ *
+ * Note: Contention/performance aren't currently a significant
+ * concern here considering the relatively low frequency of
+ * hrtimer callbacks (5ms period) and that reads typically only
+ * happen in response to a hrtimer event and likely complete
+ * before the next callback.
+ *
+ * Note: This lock is not held *while* reading and copying data
+ * to userspace so the value of head observed in htrimer
+ * callbacks won't represent any partial consumption of data.
+ */
+ spinlock_t ptr_lock;
+
+ /**
+ * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
+ * used for reading.
+ *
+ * Initial values of 0xffffffff are invalid and imply that an
+ * update is required (and should be ignored by an attempted
+ * read)
+ */
+ struct {
+ u32 offset;
+ } tails[2];
+
+ /**
+ * @aged_tail_idx: Index for the aged tail ready to read() data up to.
+ */
+ unsigned int aged_tail_idx;
+
+ /**
+ * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
+ * was read; used to determine when it is old enough to trust.
+ */
+ u64 aging_timestamp;
+
+ /**
+ * @head: Although we can always read back the head pointer register,
+ * we prefer to avoid trusting the HW state, just to avoid any
+ * risk that some hardware condition could * somehow bump the
+ * head pointer unpredictably and cause us to forward the wrong
+ * OA buffer data to userspace.
+ */
+ u32 head;
+ } oa_buffer;
+
+ /**
+ * A batch buffer doing a wait on the GPU for the NOA logic to be
+ * reprogrammed.
+ */
+ struct i915_vma *noa_wait;
+};
+
+/**
+ * struct i915_oa_ops - Gen specific implementation of an OA unit stream
+ */
+struct i915_oa_ops {
+ /**
+ * @is_valid_b_counter_reg: Validates register's address for
+ * programming boolean counters for a particular platform.
+ */
+ bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
+
+ /**
+ * @is_valid_mux_reg: Validates register's address for programming mux
+ * for a particular platform.
+ */
+ bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
+
+ /**
+ * @is_valid_flex_reg: Validates register's address for programming
+ * flex EU filtering for a particular platform.
+ */
+ bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
+
+ /**
+ * @enable_metric_set: Selects and applies any MUX configuration to set
+ * up the Boolean and Custom (B/C) counters that are part of the
+ * counter reports being sampled. May apply system constraints such as
+ * disabling EU clock gating as required.
+ */
+ int (*enable_metric_set)(struct i915_perf_stream *stream);
+
+ /**
+ * @disable_metric_set: Remove system constraints associated with using
+ * the OA unit.
+ */
+ void (*disable_metric_set)(struct i915_perf_stream *stream);
+
+ /**
+ * @oa_enable: Enable periodic sampling
+ */
+ void (*oa_enable)(struct i915_perf_stream *stream);
+
+ /**
+ * @oa_disable: Disable periodic sampling
+ */
+ void (*oa_disable)(struct i915_perf_stream *stream);
+
+ /**
+ * @read: Copy data from the circular OA buffer into a given userspace
+ * buffer.
+ */
+ int (*read)(struct i915_perf_stream *stream,
+ char __user *buf,
+ size_t count,
+ size_t *offset);
+
+ /**
+ * @oa_hw_tail_read: read the OA tail pointer register
+ *
+ * In particular this enables us to share all the fiddly code for
+ * handling the OA unit tail pointer race that affects multiple
+ * generations.
+ */
+ u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
+};
+
+struct i915_perf {
+ struct drm_i915_private *i915;
+
+ struct kobject *metrics_kobj;
+ struct ctl_table_header *sysctl_header;
+
+ /*
+ * Lock associated with adding/modifying/removing OA configs
+ * in perf->metrics_idr.
+ */
+ struct mutex metrics_lock;
+
+ /*
+ * List of dynamic configurations (struct i915_oa_config), you
+ * need to hold perf->metrics_lock to access it.
+ */
+ struct idr metrics_idr;
+
+ /*
+ * Lock associated with anything below within this structure
+ * except exclusive_stream.
+ */
+ struct mutex lock;
+
+ /*
+ * The stream currently using the OA unit. If accessed
+ * outside a syscall associated to its file
+ * descriptor.
+ */
+ struct i915_perf_stream *exclusive_stream;
+
+ /**
+ * For rate limiting any notifications of spurious
+ * invalid OA reports
+ */
+ struct ratelimit_state spurious_report_rs;
+
+ struct i915_oa_config test_config;
+
+ u32 gen7_latched_oastatus1;
+ u32 ctx_oactxctrl_offset;
+ u32 ctx_flexeu0_offset;
+
+ /**
+ * The RPT_ID/reason field for Gen8+ includes a bit
+ * to determine if the CTX ID in the report is valid
+ * but the specific bit differs between Gen 8 and 9
+ */
+ u32 gen8_valid_ctx_bit;
+
+ struct i915_oa_ops ops;
+ const struct i915_oa_format *oa_formats;
+
+ atomic64_t noa_programming_delay;
+};
+
+#endif /* _I915_PERF_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index d0508719492e..85912917c062 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -301,7 +301,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
return;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_engine_pmu *pmu = &engine->pmu;
unsigned long flags;
bool busy;
@@ -1080,10 +1080,21 @@ static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
cpuhp_remove_multi_state(cpuhp_slot);
}
+static bool is_igp(struct drm_i915_private *i915)
+{
+ struct pci_dev *pdev = i915->drm.pdev;
+
+ /* IGP is 0000:00:02.0 */
+ return pci_domain_nr(pdev->bus) == 0 &&
+ pdev->bus->number == 0 &&
+ PCI_SLOT(pdev->devfn) == 2 &&
+ PCI_FUNC(pdev->devfn) == 0;
+}
+
void i915_pmu_register(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
- int ret;
+ int ret = -ENOMEM;
if (INTEL_GEN(i915) <= 2) {
dev_info(i915->drm.dev, "PMU not supported for this GPU.");
@@ -1091,10 +1102,8 @@ void i915_pmu_register(struct drm_i915_private *i915)
}
i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
- if (!i915_pmu_events_attr_group.attrs) {
- ret = -ENOMEM;
+ if (!i915_pmu_events_attr_group.attrs)
goto err;
- }
pmu->base.attr_groups = i915_pmu_attr_groups;
pmu->base.task_ctx_nr = perf_invalid_context;
@@ -1110,10 +1119,19 @@ void i915_pmu_register(struct drm_i915_private *i915)
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
- ret = perf_pmu_register(&pmu->base, "i915", -1);
- if (ret)
+ if (!is_igp(i915))
+ pmu->name = kasprintf(GFP_KERNEL,
+ "i915-%s",
+ dev_name(i915->drm.dev));
+ else
+ pmu->name = "i915";
+ if (!pmu->name)
goto err;
+ ret = perf_pmu_register(&pmu->base, pmu->name, -1);
+ if (ret)
+ goto err_name;
+
ret = i915_pmu_register_cpuhp_state(pmu);
if (ret)
goto err_unreg;
@@ -1122,6 +1140,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
err_unreg:
perf_pmu_unregister(&pmu->base);
+err_name:
+ if (!is_igp(i915))
+ kfree(pmu->name);
err:
pmu->base.event_init = NULL;
free_event_attributes(pmu);
@@ -1143,5 +1164,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
perf_pmu_unregister(&pmu->base);
pmu->base.event_init = NULL;
+ if (!is_igp(i915))
+ kfree(pmu->name);
free_event_attributes(pmu);
}
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 067dbbf3bdff..bf52e3983631 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -47,6 +47,10 @@ struct i915_pmu {
*/
struct pmu base;
/**
+ * @name: Name as registered with perf core.
+ */
+ const char *name;
+ /**
* @lock: Lock protecting enable mask and ref count handling.
*/
spinlock_t lock;
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index abac5042da2b..c27cfef9281c 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -7,6 +7,7 @@
#include <linux/nospec.h>
#include "i915_drv.h"
+#include "i915_perf.h"
#include "i915_query.h"
#include <uapi/drm/i915_drm.h>
@@ -140,10 +141,305 @@ query_engine_info(struct drm_i915_private *i915,
return len;
}
+static int can_copy_perf_config_registers_or_number(u32 user_n_regs,
+ u64 user_regs_ptr,
+ u32 kernel_n_regs)
+{
+ /*
+ * We'll just put the number of registers, and won't copy the
+ * register.
+ */
+ if (user_n_regs == 0)
+ return 0;
+
+ if (user_n_regs < kernel_n_regs)
+ return -EINVAL;
+
+ if (!access_ok(u64_to_user_ptr(user_regs_ptr),
+ 2 * sizeof(u32) * kernel_n_regs))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs,
+ u32 kernel_n_regs,
+ u64 user_regs_ptr,
+ u32 *user_n_regs)
+{
+ u32 r;
+
+ if (*user_n_regs == 0) {
+ *user_n_regs = kernel_n_regs;
+ return 0;
+ }
+
+ *user_n_regs = kernel_n_regs;
+
+ for (r = 0; r < kernel_n_regs; r++) {
+ u32 __user *user_reg_ptr =
+ u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2);
+ u32 __user *user_val_ptr =
+ u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 +
+ sizeof(u32));
+ int ret;
+
+ ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr),
+ user_reg_ptr);
+ if (ret)
+ return -EFAULT;
+
+ ret = __put_user(kernel_regs[r].value, user_val_ptr);
+ if (ret)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int query_perf_config_data(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item,
+ bool use_uuid)
+{
+ struct drm_i915_query_perf_config __user *user_query_config_ptr =
+ u64_to_user_ptr(query_item->data_ptr);
+ struct drm_i915_perf_oa_config __user *user_config_ptr =
+ u64_to_user_ptr(query_item->data_ptr +
+ sizeof(struct drm_i915_query_perf_config));
+ struct drm_i915_perf_oa_config user_config;
+ struct i915_perf *perf = &i915->perf;
+ struct i915_oa_config *oa_config;
+ char uuid[UUID_STRING_LEN + 1];
+ u64 config_id;
+ u32 flags, total_size;
+ int ret;
+
+ if (!perf->i915)
+ return -ENODEV;
+
+ total_size =
+ sizeof(struct drm_i915_query_perf_config) +
+ sizeof(struct drm_i915_perf_oa_config);
+
+ if (query_item->length == 0)
+ return total_size;
+
+ if (query_item->length < total_size) {
+ DRM_DEBUG("Invalid query config data item size=%u expected=%u\n",
+ query_item->length, total_size);
+ return -EINVAL;
+ }
+
+ if (!access_ok(user_query_config_ptr, total_size))
+ return -EFAULT;
+
+ if (__get_user(flags, &user_query_config_ptr->flags))
+ return -EFAULT;
+
+ if (flags != 0)
+ return -EINVAL;
+
+ if (use_uuid) {
+ struct i915_oa_config *tmp;
+ int id;
+
+ BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid));
+
+ memset(&uuid, 0, sizeof(uuid));
+ if (__copy_from_user(uuid, user_query_config_ptr->uuid,
+ sizeof(user_query_config_ptr->uuid)))
+ return -EFAULT;
+
+ oa_config = NULL;
+ rcu_read_lock();
+ idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+ if (!strcmp(tmp->uuid, uuid)) {
+ oa_config = i915_oa_config_get(tmp);
+ break;
+ }
+ }
+ rcu_read_unlock();
+ } else {
+ if (__get_user(config_id, &user_query_config_ptr->config))
+ return -EFAULT;
+
+ oa_config = i915_perf_get_oa_config(perf, config_id);
+ }
+ if (!oa_config)
+ return -ENOENT;
+
+ if (__copy_from_user(&user_config, user_config_ptr,
+ sizeof(user_config))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs,
+ user_config.boolean_regs_ptr,
+ oa_config->b_counter_regs_len);
+ if (ret)
+ goto out;
+
+ ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs,
+ user_config.flex_regs_ptr,
+ oa_config->flex_regs_len);
+ if (ret)
+ goto out;
+
+ ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs,
+ user_config.mux_regs_ptr,
+ oa_config->mux_regs_len);
+ if (ret)
+ goto out;
+
+ ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs,
+ oa_config->b_counter_regs_len,
+ user_config.boolean_regs_ptr,
+ &user_config.n_boolean_regs);
+ if (ret)
+ goto out;
+
+ ret = copy_perf_config_registers_or_number(oa_config->flex_regs,
+ oa_config->flex_regs_len,
+ user_config.flex_regs_ptr,
+ &user_config.n_flex_regs);
+ if (ret)
+ goto out;
+
+ ret = copy_perf_config_registers_or_number(oa_config->mux_regs,
+ oa_config->mux_regs_len,
+ user_config.mux_regs_ptr,
+ &user_config.n_mux_regs);
+ if (ret)
+ goto out;
+
+ memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid));
+
+ if (__copy_to_user(user_config_ptr, &user_config,
+ sizeof(user_config))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = total_size;
+
+out:
+ i915_oa_config_put(oa_config);
+ return ret;
+}
+
+static size_t sizeof_perf_config_list(size_t count)
+{
+ return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count;
+}
+
+static size_t sizeof_perf_metrics(struct i915_perf *perf)
+{
+ struct i915_oa_config *tmp;
+ size_t i;
+ int id;
+
+ i = 1;
+ rcu_read_lock();
+ idr_for_each_entry(&perf->metrics_idr, tmp, id)
+ i++;
+ rcu_read_unlock();
+
+ return sizeof_perf_config_list(i);
+}
+
+static int query_perf_config_list(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item)
+{
+ struct drm_i915_query_perf_config __user *user_query_config_ptr =
+ u64_to_user_ptr(query_item->data_ptr);
+ struct i915_perf *perf = &i915->perf;
+ u64 *oa_config_ids = NULL;
+ int alloc, n_configs;
+ u32 flags;
+ int ret;
+
+ if (!perf->i915)
+ return -ENODEV;
+
+ if (query_item->length == 0)
+ return sizeof_perf_metrics(perf);
+
+ if (get_user(flags, &user_query_config_ptr->flags))
+ return -EFAULT;
+
+ if (flags != 0)
+ return -EINVAL;
+
+ n_configs = 1;
+ do {
+ struct i915_oa_config *tmp;
+ u64 *ids;
+ int id;
+
+ ids = krealloc(oa_config_ids,
+ n_configs * sizeof(*oa_config_ids),
+ GFP_KERNEL);
+ if (!ids)
+ return -ENOMEM;
+
+ alloc = fetch_and_zero(&n_configs);
+
+ ids[n_configs++] = 1ull; /* reserved for test_config */
+ rcu_read_lock();
+ idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+ if (n_configs < alloc)
+ ids[n_configs] = id;
+ n_configs++;
+ }
+ rcu_read_unlock();
+
+ oa_config_ids = ids;
+ } while (n_configs > alloc);
+
+ if (query_item->length < sizeof_perf_config_list(n_configs)) {
+ DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n",
+ query_item->length,
+ sizeof_perf_config_list(n_configs));
+ kfree(oa_config_ids);
+ return -EINVAL;
+ }
+
+ if (put_user(n_configs, &user_query_config_ptr->config)) {
+ kfree(oa_config_ids);
+ return -EFAULT;
+ }
+
+ ret = copy_to_user(user_query_config_ptr + 1,
+ oa_config_ids,
+ n_configs * sizeof(*oa_config_ids));
+ kfree(oa_config_ids);
+ if (ret)
+ return -EFAULT;
+
+ return sizeof_perf_config_list(n_configs);
+}
+
+static int query_perf_config(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item)
+{
+ switch (query_item->flags) {
+ case DRM_I915_QUERY_PERF_CONFIG_LIST:
+ return query_perf_config_list(i915, query_item);
+ case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID:
+ return query_perf_config_data(i915, query_item, true);
+ case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID:
+ return query_perf_config_data(i915, query_item, false);
+ default:
+ return -EINVAL;
+ }
+}
+
static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item) = {
query_topology_info,
query_engine_info,
+ query_perf_config,
};
int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6d67bd238cfe..855db888516c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4)
#define MI_PREDICATE_SRC1 _MMIO(0x2408)
#define MI_PREDICATE_SRC1_UDW _MMIO(0x2408 + 4)
-
+#define MI_PREDICATE_DATA _MMIO(0x2410)
+#define MI_PREDICATE_RESULT _MMIO(0x2418)
+#define MI_PREDICATE_RESULT_1 _MMIO(0x241c)
#define MI_PREDICATE_RESULT_2 _MMIO(0x2214)
#define LOWER_SLICE_ENABLED (1 << 0)
#define LOWER_SLICE_DISABLED (0 << 0)
@@ -2883,6 +2885,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN6_RC_SLEEP_PSMI_CONTROL _MMIO(0x2050)
#define GEN6_PSMI_SLEEP_MSG_DISABLE (1 << 0)
+#define GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7)
#define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12)
#define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1 << 10)
@@ -4048,10 +4051,15 @@ enum {
#define SARBUNIT_CLKGATE_DIS (1 << 5)
#define RCCUNIT_CLKGATE_DIS (1 << 7)
#define MSCUNIT_CLKGATE_DIS (1 << 10)
+#define L3_CLKGATE_DIS REG_BIT(16)
+#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
#define GWUNIT_CLKGATE_DIS (1 << 16)
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528)
+#define CPSSUNIT_CLKGATE_DIS REG_BIT(9)
+
#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
#define VFUNIT_CLKGATE_DIS (1 << 20)
@@ -4145,6 +4153,7 @@ enum {
#define _VTOTAL_A 0x6000c
#define _VBLANK_A 0x60010
#define _VSYNC_A 0x60014
+#define _EXITLINE_A 0x60018
#define _PIPEASRC 0x6001c
#define _BCLRPAT_A 0x60020
#define _VSYNCSHIFT_A 0x60028
@@ -4196,6 +4205,11 @@ enum {
#define PIPESRC(trans) _MMIO_TRANS2(trans, _PIPEASRC)
#define PIPE_MULT(trans) _MMIO_TRANS2(trans, _PIPE_MULT_A)
+#define EXITLINE(trans) _MMIO_TRANS2(trans, _EXITLINE_A)
+#define EXITLINE_ENABLE REG_BIT(31)
+#define EXITLINE_MASK REG_GENMASK(12, 0)
+#define EXITLINE_SHIFT 0
+
/*
* HSW+ eDP PSR registers
*
@@ -4684,6 +4698,7 @@ enum {
* (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
* of the infoframe structure specified by CEA-861. */
#define VIDEO_DIP_DATA_SIZE 32
+#define VIDEO_DIP_GMP_DATA_SIZE 36
#define VIDEO_DIP_VSC_DATA_SIZE 36
#define VIDEO_DIP_PPS_DATA_SIZE 132
#define VIDEO_DIP_CTL _MMIO(0x61170)
@@ -7262,6 +7277,8 @@ enum {
#define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084)
#define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088)
+#define DMC_DEBUG3 _MMIO(0x101090)
+
/* interrupts */
#define DE_MASTER_IRQ_CONTROL (1 << 31)
#define DE_SPRITEB_FLIP_DONE (1 << 29)
@@ -7660,7 +7677,10 @@ enum {
#define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE (1 << 10)
#define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec)
+#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1)
#define GEN9_CTX_PREEMPT_REG _MMIO(0x2248)
+#define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11)
+
#define GEN8_CS_CHICKEN1 _MMIO(0x2580)
#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0)
#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1))
@@ -8870,6 +8890,7 @@ enum {
#define GEN9_SAGV_DISABLE 0x0
#define GEN9_SAGV_IS_DISABLED 0x1
#define GEN9_SAGV_ENABLE 0x3
+#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23
#define GEN6_PCODE_DATA _MMIO(0x138128)
#define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8
#define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16
@@ -9747,18 +9768,7 @@ enum skl_power_gate {
#define _TRANSC_MSA_MISC 0x62410
#define _TRANS_EDP_MSA_MISC 0x6f410
#define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC)
-
-#define TRANS_MSA_SYNC_CLK (1 << 0)
-#define TRANS_MSA_SAMPLING_444 (2 << 1)
-#define TRANS_MSA_CLRSP_YCBCR (1 << 3)
-#define TRANS_MSA_YCBCR_BT709 (1 << 4)
-#define TRANS_MSA_6_BPC (0 << 5)
-#define TRANS_MSA_8_BPC (1 << 5)
-#define TRANS_MSA_10_BPC (2 << 5)
-#define TRANS_MSA_12_BPC (3 << 5)
-#define TRANS_MSA_16_BPC (4 << 5)
-#define TRANS_MSA_CEA_RANGE (1 << 3)
-#define TRANS_MSA_USE_VSC_SDP (1 << 14)
+/* See DP_MSA_MISC_* for the bit definitions */
/* LCPLL Control */
#define LCPLL_CTL _MMIO(0x130040)
@@ -10291,6 +10301,8 @@ enum skl_power_gate {
/* GEN9 DC */
#define DC_STATE_EN _MMIO(0x45504)
#define DC_STATE_DISABLE 0
+#define DC_STATE_EN_DC3CO REG_BIT(30)
+#define DC_STATE_DC3CO_STATUS REG_BIT(29)
#define DC_STATE_EN_UPTO_DC5 (1 << 0)
#define DC_STATE_EN_DC9 (1 << 3)
#define DC_STATE_EN_UPTO_DC6 (2 << 0)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 437f9fc6282e..4575f368455d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -206,14 +206,14 @@ static void remove_from_engine(struct i915_request *rq)
* check that the rq still belongs to the newly locked engine.
*/
locked = READ_ONCE(rq->engine);
- spin_lock(&locked->active.lock);
+ spin_lock_irq(&locked->active.lock);
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
spin_unlock(&locked->active.lock);
spin_lock(&engine->active.lock);
locked = engine;
}
list_del(&rq->sched.link);
- spin_unlock(&locked->active.lock);
+ spin_unlock_irq(&locked->active.lock);
}
bool i915_request_retire(struct i915_request *rq)
@@ -242,8 +242,6 @@ bool i915_request_retire(struct i915_request *rq)
&i915_request_timeline(rq)->requests));
rq->ring->head = rq->postfix;
- local_irq_disable();
-
/*
* We only loosely track inflight requests across preemption,
* and so we may find ourselves attempting to retire a _completed_
@@ -252,7 +250,7 @@ bool i915_request_retire(struct i915_request *rq)
*/
remove_from_engine(rq);
- spin_lock(&rq->lock);
+ spin_lock_irq(&rq->lock);
i915_request_mark_complete(rq);
if (!i915_request_signaled(rq))
dma_fence_signal_locked(&rq->fence);
@@ -267,9 +265,7 @@ bool i915_request_retire(struct i915_request *rq)
__notify_execute_cb(rq);
}
GEM_BUG_ON(!list_empty(&rq->execute_cb));
- spin_unlock(&rq->lock);
-
- local_irq_enable();
+ spin_unlock_irq(&rq->lock);
remove_from_client(rq);
list_del(&rq->link);
@@ -649,6 +645,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->gem_context = ce->gem_context;
rq->engine = ce->engine;
rq->ring = ce->ring;
+ rq->execution_mask = ce->engine->mask;
rcu_assign_pointer(rq->timeline, tl);
rq->hwsp_seqno = tl->hwsp_seqno;
@@ -671,7 +668,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->batch = NULL;
rq->capture_list = NULL;
rq->flags = 0;
- rq->execution_mask = ALL_ENGINES;
INIT_LIST_HEAD(&rq->execute_cb);
@@ -1496,6 +1492,7 @@ long i915_request_wait(struct i915_request *rq,
break;
}
+ intel_engine_flush_submission(rq->engine);
timeout = io_schedule_timeout(timeout);
}
__set_current_state(TASK_RUNNING);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 6a95242b280d..96991d64759c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -216,8 +216,9 @@ struct i915_request {
unsigned long emitted_jiffies;
unsigned long flags;
-#define I915_REQUEST_WAITBOOST BIT(0)
-#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_WAITBOOST BIT(0)
+#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_SENTINEL BIT(2)
/** timeline->request entry for this request */
struct list_head link;
@@ -440,6 +441,11 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
}
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
+{
+ return unlikely(rq->flags & I915_REQUEST_SENTINEL);
+}
+
static inline struct intel_timeline *
i915_request_timeline(struct i915_request *rq)
{
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7b84ebca2901..0ca40f6bf08c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -177,9 +177,22 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority | __NO_PREEMPTION;
}
+static inline bool need_preempt(int prio, int active)
+{
+ /*
+ * Allow preemption of low -> normal -> high, but we do
+ * not allow low priority tasks to preempt other low priority
+ * tasks under the impression that latency for low priority
+ * tasks does not matter (as much as background throughput),
+ * so kiss.
+ */
+ return prio >= max(I915_PRIORITY_NORMAL, active);
+}
+
static void kick_submission(struct intel_engine_cs *engine, int prio)
{
- const struct i915_request *inflight = *engine->execlists.active;
+ const struct i915_request *inflight =
+ execlists_active(&engine->execlists);
/*
* If we are already the currently executing context, don't
@@ -188,7 +201,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio)
* tasklet, i.e. we have not change the priority queue
* sufficiently to oust the running context.
*/
- if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
+ if (!inflight || !need_preempt(prio, rq_prio(inflight)))
return;
tasklet_hi_schedule(&engine->execlists.tasklet);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7eefccff39bf..07d243acf553 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -52,22 +52,4 @@ static inline void i915_priolist_free(struct i915_priolist *p)
__i915_priolist_free(p);
}
-static inline bool i915_scheduler_need_preempt(int prio, int active)
-{
- /*
- * Allow preemption of low -> normal -> high, but we do
- * not allow low priority tasks to preempt other low priority
- * tasks under the impression that latency for low priority
- * tasks does not matter (as much as background throughput),
- * so kiss.
- *
- * More naturally we would write
- * prio >= max(0, last);
- * except that we wish to prevent triggering preemption at the same
- * priority level: the task that is running should remain running
- * to preserve FIFO ordering of dependencies.
- */
- return prio > max(I915_PRIORITY_NORMAL - 1, active);
-}
-
#endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index aad81acba9dc..d18e70550054 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -49,6 +49,15 @@ struct i915_sched_attr {
* DAG of each request, we are able to insert it into a sorted queue when it
* is ready, and are able to reorder its portion of the graph to accommodate
* dynamic priority changes.
+ *
+ * Ok, there is now one active element to the "scheduler" in the backends.
+ * We let a new context run for a small amount of time before re-evaluating
+ * the run order. As we re-evaluate, we maintain the strict ordering of
+ * dependencies, but attempt to rotate the active contexts (the current context
+ * is put to the back of its priority queue, then reshuffling its dependents).
+ * This provides minimal timeslicing and prevents a userspace hog (e.g.
+ * something waiting on a user semaphore [VkEvent]) from denying service to
+ * others.
*/
struct i915_sched_node {
struct list_head signalers_list; /* those before us, we depend upon */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 9fdcd4e2c799..e90c4d0af8fd 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -703,7 +703,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
list_add_tail(&vma->vm_link, &vma->vm->bound_list);
if (vma->obj) {
- atomic_inc(&vma->obj->mm.pages_pin_count);
atomic_inc(&vma->obj->bind_count);
assert_bind_count(vma->obj);
}
@@ -726,14 +725,12 @@ i915_vma_remove(struct i915_vma *vma)
if (vma->obj) {
struct drm_i915_gem_object *obj = vma->obj;
- atomic_dec(&obj->bind_count);
-
/*
* And finally now the object is completely decoupled from this
* vma, we can drop its hold on the backing storage and allow
* it to be reaped by the shrinker.
*/
- i915_gem_object_unpin_pages(obj);
+ atomic_dec(&obj->bind_count);
assert_bind_count(obj);
}
@@ -802,8 +799,11 @@ static int vma_get_pages(struct i915_vma *vma)
}
err = vma->ops->set_pages(vma);
- if (err)
+ if (err) {
+ if (vma->obj)
+ i915_gem_object_unpin_pages(vma->obj);
goto unlock;
+ }
}
atomic_inc(&vma->pages_count);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0cdc2465534b..e9940f932d26 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -160,6 +160,8 @@ struct intel_device_info {
unsigned int page_sizes; /* page sizes supported by the HW */
+ u32 memory_regions; /* regions supported by the HW */
+
u32 display_mmio_offset;
u8 pipe_mask;
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
new file mode 100644
index 000000000000..72f98a111de1
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_memory_region.h"
+#include "i915_drv.h"
+
+/* XXX: Hysterical raisins. BIT(inst) needs to just be (inst) at some point. */
+#define REGION_MAP(type, inst) \
+ BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst)
+
+const u32 intel_region_map[] = {
+ [INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0),
+ [INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0),
+ [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0),
+};
+
+static u64
+intel_memory_region_free_pages(struct intel_memory_region *mem,
+ struct list_head *blocks)
+{
+ struct i915_buddy_block *block, *on;
+ u64 size = 0;
+
+ list_for_each_entry_safe(block, on, blocks, link) {
+ size += i915_buddy_block_size(&mem->mm, block);
+ i915_buddy_free(&mem->mm, block);
+ }
+ INIT_LIST_HEAD(blocks);
+
+ return size;
+}
+
+void
+__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem,
+ struct list_head *blocks)
+{
+ mutex_lock(&mem->mm_lock);
+ intel_memory_region_free_pages(mem, blocks);
+ mutex_unlock(&mem->mm_lock);
+}
+
+void
+__intel_memory_region_put_block_buddy(struct i915_buddy_block *block)
+{
+ struct list_head blocks;
+
+ INIT_LIST_HEAD(&blocks);
+ list_add(&block->link, &blocks);
+ __intel_memory_region_put_pages_buddy(block->private, &blocks);
+}
+
+int
+__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags,
+ struct list_head *blocks)
+{
+ unsigned int min_order = 0;
+ unsigned long n_pages;
+
+ GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size));
+ GEM_BUG_ON(!list_empty(blocks));
+
+ if (flags & I915_ALLOC_MIN_PAGE_SIZE) {
+ min_order = ilog2(mem->min_page_size) -
+ ilog2(mem->mm.chunk_size);
+ }
+
+ if (flags & I915_ALLOC_CONTIGUOUS) {
+ size = roundup_pow_of_two(size);
+ min_order = ilog2(size) - ilog2(mem->mm.chunk_size);
+ }
+
+ n_pages = size >> ilog2(mem->mm.chunk_size);
+
+ mutex_lock(&mem->mm_lock);
+
+ do {
+ struct i915_buddy_block *block;
+ unsigned int order;
+
+ order = fls(n_pages) - 1;
+ GEM_BUG_ON(order > mem->mm.max_order);
+ GEM_BUG_ON(order < min_order);
+
+ do {
+ block = i915_buddy_alloc(&mem->mm, order);
+ if (!IS_ERR(block))
+ break;
+
+ if (order-- == min_order)
+ goto err_free_blocks;
+ } while (1);
+
+ n_pages -= BIT(order);
+
+ block->private = mem;
+ list_add(&block->link, blocks);
+
+ if (!n_pages)
+ break;
+ } while (1);
+
+ mutex_unlock(&mem->mm_lock);
+ return 0;
+
+err_free_blocks:
+ intel_memory_region_free_pages(mem, blocks);
+ mutex_unlock(&mem->mm_lock);
+ return -ENXIO;
+}
+
+struct i915_buddy_block *
+__intel_memory_region_get_block_buddy(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
+{
+ struct i915_buddy_block *block;
+ LIST_HEAD(blocks);
+ int ret;
+
+ ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks);
+ if (ret)
+ return ERR_PTR(ret);
+
+ block = list_first_entry(&blocks, typeof(*block), link);
+ list_del_init(&block->link);
+ return block;
+}
+
+int intel_memory_region_init_buddy(struct intel_memory_region *mem)
+{
+ return i915_buddy_init(&mem->mm, resource_size(&mem->region),
+ PAGE_SIZE);
+}
+
+void intel_memory_region_release_buddy(struct intel_memory_region *mem)
+{
+ i915_buddy_fini(&mem->mm);
+}
+
+struct intel_memory_region *
+intel_memory_region_create(struct drm_i915_private *i915,
+ resource_size_t start,
+ resource_size_t size,
+ resource_size_t min_page_size,
+ resource_size_t io_start,
+ const struct intel_memory_region_ops *ops)
+{
+ struct intel_memory_region *mem;
+ int err;
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ mem->i915 = i915;
+ mem->region = (struct resource)DEFINE_RES_MEM(start, size);
+ mem->io_start = io_start;
+ mem->min_page_size = min_page_size;
+ mem->ops = ops;
+
+ mutex_init(&mem->objects.lock);
+ INIT_LIST_HEAD(&mem->objects.list);
+ INIT_LIST_HEAD(&mem->objects.purgeable);
+
+ mutex_init(&mem->mm_lock);
+
+ if (ops->init) {
+ err = ops->init(mem);
+ if (err)
+ goto err_free;
+ }
+
+ kref_init(&mem->kref);
+ return mem;
+
+err_free:
+ kfree(mem);
+ return ERR_PTR(err);
+}
+
+static void __intel_memory_region_destroy(struct kref *kref)
+{
+ struct intel_memory_region *mem =
+ container_of(kref, typeof(*mem), kref);
+
+ if (mem->ops->release)
+ mem->ops->release(mem);
+
+ mutex_destroy(&mem->mm_lock);
+ mutex_destroy(&mem->objects.lock);
+ kfree(mem);
+}
+
+struct intel_memory_region *
+intel_memory_region_get(struct intel_memory_region *mem)
+{
+ kref_get(&mem->kref);
+ return mem;
+}
+
+void intel_memory_region_put(struct intel_memory_region *mem)
+{
+ kref_put(&mem->kref, __intel_memory_region_destroy);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_memory_region.c"
+#include "selftests/mock_region.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
new file mode 100644
index 000000000000..49b059a2be70
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_MEMORY_REGION_H__
+#define __INTEL_MEMORY_REGION_H__
+
+#include <linux/kref.h>
+#include <linux/ioport.h>
+#include <linux/mutex.h>
+#include <linux/io-mapping.h>
+
+#include "i915_buddy.h"
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+struct intel_memory_region;
+struct sg_table;
+
+/**
+ * Base memory type
+ */
+enum intel_memory_type {
+ INTEL_MEMORY_SYSTEM = 0,
+ INTEL_MEMORY_LOCAL,
+ INTEL_MEMORY_STOLEN,
+};
+
+enum intel_region_id {
+ INTEL_REGION_SMEM = 0,
+ INTEL_REGION_LMEM,
+ INTEL_REGION_STOLEN,
+ INTEL_REGION_UNKNOWN, /* Should be last */
+};
+
+#define REGION_SMEM BIT(INTEL_REGION_SMEM)
+#define REGION_LMEM BIT(INTEL_REGION_LMEM)
+#define REGION_STOLEN BIT(INTEL_REGION_STOLEN)
+
+#define INTEL_MEMORY_TYPE_SHIFT 16
+
+#define MEMORY_TYPE_FROM_REGION(r) (ilog2((r) >> INTEL_MEMORY_TYPE_SHIFT))
+#define MEMORY_INSTANCE_FROM_REGION(r) (ilog2((r) & 0xffff))
+
+#define I915_ALLOC_MIN_PAGE_SIZE BIT(0)
+#define I915_ALLOC_CONTIGUOUS BIT(1)
+
+/**
+ * Memory regions encoded as type | instance
+ */
+extern const u32 intel_region_map[];
+
+struct intel_memory_region_ops {
+ unsigned int flags;
+
+ int (*init)(struct intel_memory_region *mem);
+ void (*release)(struct intel_memory_region *mem);
+
+ struct drm_i915_gem_object *
+ (*create_object)(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags);
+};
+
+struct intel_memory_region {
+ struct drm_i915_private *i915;
+
+ const struct intel_memory_region_ops *ops;
+
+ struct io_mapping iomap;
+ struct resource region;
+
+ struct i915_buddy_mm mm;
+ struct mutex mm_lock;
+
+ struct kref kref;
+
+ resource_size_t io_start;
+ resource_size_t min_page_size;
+
+ unsigned int type;
+ unsigned int instance;
+ unsigned int id;
+
+ struct {
+ struct mutex lock; /* Protects access to objects */
+ struct list_head list;
+ struct list_head purgeable;
+ } objects;
+};
+
+int intel_memory_region_init_buddy(struct intel_memory_region *mem);
+void intel_memory_region_release_buddy(struct intel_memory_region *mem);
+
+int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags,
+ struct list_head *blocks);
+struct i915_buddy_block *
+__intel_memory_region_get_block_buddy(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags);
+void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem,
+ struct list_head *blocks);
+void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block);
+
+struct intel_memory_region *
+intel_memory_region_create(struct drm_i915_private *i915,
+ resource_size_t start,
+ resource_size_t size,
+ resource_size_t min_page_size,
+ resource_size_t io_start,
+ const struct intel_memory_region_ops *ops);
+
+struct intel_memory_region *
+intel_memory_region_get(struct intel_memory_region *mem);
+void intel_memory_region_put(struct intel_memory_region *mem);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index 15f8bff141f9..1035d3d46fd8 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_ICELAKE(dev_priv));
return PCH_ICP;
case INTEL_PCH_MCC_DEVICE_ID_TYPE:
- case INTEL_PCH_MCC2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n");
WARN_ON(!IS_ELKHARTLAKE(dev_priv));
return PCH_MCC;
@@ -87,6 +86,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
DRM_DEBUG_KMS("Found Tiger Lake LP PCH\n");
WARN_ON(!IS_TIGERLAKE(dev_priv));
return PCH_TGP;
+ case INTEL_PCH_JSP_DEVICE_ID_TYPE:
+ case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
+ DRM_DEBUG_KMS("Found Jasper Lake PCH\n");
+ WARN_ON(!IS_ELKHARTLAKE(dev_priv));
+ return PCH_JSP;
default:
return PCH_NONE;
}
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index c29c81ec7971..f4dc18c34291 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -23,6 +23,7 @@ enum intel_pch {
PCH_SPT, /* Sunrisepoint/Kaby Lake PCH */
PCH_CNP, /* Cannon/Comet Lake PCH */
PCH_ICP, /* Ice Lake PCH */
+ PCH_JSP, /* Jasper Lake PCH */
PCH_MCC, /* Mule Creek Canyon PCH */
PCH_TGP, /* Tiger Lake PCH */
};
@@ -44,14 +45,16 @@ enum intel_pch {
#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
-#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880
#define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080
+#define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80
+#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880
#define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100
#define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000
#define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */
#define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type)
#define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id)
+#define HAS_PCH_JSP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_JSP)
#define HAS_PCH_MCC(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_MCC)
#define HAS_PCH_TGP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_TGP)
#define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index bfcf03ab5245..362234449087 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -25,7 +25,6 @@
*
*/
-#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/pm_runtime.h>
@@ -38,6 +37,8 @@
#include "display/intel_fbc.h"
#include "display/intel_sprite.h"
+#include "gt/intel_llc.h"
+
#include "i915_drv.h"
#include "i915_irq.h"
#include "i915_trace.h"
@@ -1117,10 +1118,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
clock = adjusted_mode->crtc_clock;
htotal = adjusted_mode->crtc_htotal;
- if (plane->id == PLANE_CURSOR)
- width = plane_state->base.crtc_w;
- else
- width = drm_rect_width(&plane_state->base.dst);
+ width = drm_rect_width(&plane_state->base.dst);
if (plane->id == PLANE_CURSOR) {
wm = intel_wm_method2(clock, htotal, width, cpp, latency);
@@ -2549,7 +2547,8 @@ static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state,
return ilk_wm_method2(crtc_state->pixel_rate,
crtc_state->base.adjusted_mode.crtc_htotal,
- plane_state->base.crtc_w, cpp, mem_value);
+ drm_rect_width(&plane_state->base.dst),
+ cpp, mem_value);
}
/* Only for WM_LP. */
@@ -3089,8 +3088,8 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
struct intel_pipe_wm *pipe_wm;
struct drm_device *dev = state->dev;
const struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_plane *plane;
- const struct drm_plane_state *plane_state;
+ struct intel_plane *plane;
+ const struct intel_plane_state *plane_state;
const struct intel_plane_state *pristate = NULL;
const struct intel_plane_state *sprstate = NULL;
const struct intel_plane_state *curstate = NULL;
@@ -3099,15 +3098,13 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
pipe_wm = &crtc_state->wm.ilk.optimal;
- drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &crtc_state->base) {
- const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
-
- if (plane->type == DRM_PLANE_TYPE_PRIMARY)
- pristate = ps;
- else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
- sprstate = ps;
- else if (plane->type == DRM_PLANE_TYPE_CURSOR)
- curstate = ps;
+ intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+ if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
+ pristate = plane_state;
+ else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY)
+ sprstate = plane_state;
+ else if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
+ curstate = plane_state;
}
pipe_wm->pipe_enabled = crtc_state->base.active;
@@ -3642,6 +3639,39 @@ intel_has_sagv(struct drm_i915_private *dev_priv)
dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
}
+static void
+skl_setup_sagv_block_time(struct drm_i915_private *dev_priv)
+{
+ if (INTEL_GEN(dev_priv) >= 12) {
+ u32 val = 0;
+ int ret;
+
+ ret = sandybridge_pcode_read(dev_priv,
+ GEN12_PCODE_READ_SAGV_BLOCK_TIME_US,
+ &val, NULL);
+ if (!ret) {
+ dev_priv->sagv_block_time_us = val;
+ return;
+ }
+
+ DRM_DEBUG_DRIVER("Couldn't read SAGV block time!\n");
+ } else if (IS_GEN(dev_priv, 11)) {
+ dev_priv->sagv_block_time_us = 10;
+ return;
+ } else if (IS_GEN(dev_priv, 10)) {
+ dev_priv->sagv_block_time_us = 20;
+ return;
+ } else if (IS_GEN(dev_priv, 9)) {
+ dev_priv->sagv_block_time_us = 30;
+ return;
+ } else {
+ MISSING_CASE(INTEL_GEN(dev_priv));
+ }
+
+ /* Default to an unusable block time */
+ dev_priv->sagv_block_time_us = -1;
+}
+
/*
* SAGV dynamically adjusts the system agent voltage and clock frequencies
* depending on power and performance requirements. The display engine access
@@ -3730,18 +3760,10 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
struct intel_crtc_state *crtc_state;
enum pipe pipe;
int level, latency;
- int sagv_block_time_us;
if (!intel_has_sagv(dev_priv))
return false;
- if (IS_GEN(dev_priv, 9))
- sagv_block_time_us = 30;
- else if (IS_GEN(dev_priv, 10))
- sagv_block_time_us = 20;
- else
- sagv_block_time_us = 10;
-
/*
* If there are no active CRTCs, no additional checks need be performed
*/
@@ -3788,7 +3810,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
* incur memory latencies higher than sagv_block_time_us we
* can't enable SAGV.
*/
- if (latency < sagv_block_time_us)
+ if (latency < dev_priv->sagv_block_time_us)
return false;
}
@@ -4048,7 +4070,6 @@ static uint_fixed_16_16_t
skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state)
{
- struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
u32 src_w, src_h, dst_w, dst_h;
uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
uint_fixed_16_16_t downscale_h, downscale_w;
@@ -4056,27 +4077,17 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)))
return u32_to_fixed16(0);
- /* n.b., src is 16.16 fixed point, dst is whole integer */
- if (plane->id == PLANE_CURSOR) {
- /*
- * Cursors only support 0/180 degree rotation,
- * hence no need to account for rotation here.
- */
- src_w = plane_state->base.src_w >> 16;
- src_h = plane_state->base.src_h >> 16;
- dst_w = plane_state->base.crtc_w;
- dst_h = plane_state->base.crtc_h;
- } else {
- /*
- * Src coordinates are already rotated by 270 degrees for
- * the 90/270 degree plane rotation cases (to match the
- * GTT mapping), hence no need to account for rotation here.
- */
- src_w = drm_rect_width(&plane_state->base.src) >> 16;
- src_h = drm_rect_height(&plane_state->base.src) >> 16;
- dst_w = drm_rect_width(&plane_state->base.dst);
- dst_h = drm_rect_height(&plane_state->base.dst);
- }
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ *
+ * n.b., src is 16.16 fixed point, dst is whole integer.
+ */
+ src_w = drm_rect_width(&plane_state->base.src) >> 16;
+ src_h = drm_rect_height(&plane_state->base.src) >> 16;
+ dst_w = drm_rect_width(&plane_state->base.dst);
+ dst_h = drm_rect_height(&plane_state->base.dst);
fp_w_ratio = div_fixed16(src_w, dst_w);
fp_h_ratio = div_fixed16(src_h, dst_h);
@@ -4124,8 +4135,8 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
{
struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
struct drm_atomic_state *state = crtc_state->base.state;
- struct drm_plane *plane;
- const struct drm_plane_state *drm_plane_state;
+ const struct intel_plane_state *plane_state;
+ struct intel_plane *plane;
int crtc_clock, dotclk;
u32 pipe_max_pixel_rate;
uint_fixed_16_16_t pipe_downscale;
@@ -4134,12 +4145,10 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
if (!crtc_state->base.enable)
return 0;
- drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
+ intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
uint_fixed_16_16_t plane_downscale;
uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
int bpp;
- const struct intel_plane_state *plane_state =
- to_intel_plane_state(drm_plane_state);
if (!intel_wm_plane_visible(crtc_state, plane_state))
continue;
@@ -4227,18 +4236,16 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
u64 *uv_plane_data_rate)
{
struct drm_atomic_state *state = crtc_state->base.state;
- struct drm_plane *plane;
- const struct drm_plane_state *drm_plane_state;
+ struct intel_plane *plane;
+ const struct intel_plane_state *plane_state;
u64 total_data_rate = 0;
if (WARN_ON(!state))
return 0;
/* Calculate and cache data rate for each plane */
- drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
- enum plane_id plane_id = to_intel_plane(plane)->id;
- const struct intel_plane_state *plane_state =
- to_intel_plane_state(drm_plane_state);
+ intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+ enum plane_id plane_id = plane->id;
u64 rate;
/* packed/y */
@@ -4259,18 +4266,16 @@ static u64
icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
u64 *plane_data_rate)
{
- struct drm_plane *plane;
- const struct drm_plane_state *drm_plane_state;
+ struct intel_plane *plane;
+ const struct intel_plane_state *plane_state;
u64 total_data_rate = 0;
if (WARN_ON(!crtc_state->base.state))
return 0;
/* Calculate and cache data rate for each plane */
- drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
- const struct intel_plane_state *plane_state =
- to_intel_plane_state(drm_plane_state);
- enum plane_id plane_id = to_intel_plane(plane)->id;
+ intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+ enum plane_id plane_id = plane->id;
u64 rate;
if (!plane_state->planar_linked_plane) {
@@ -4282,7 +4287,7 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
/*
* The slave plane might not iterate in
- * drm_atomic_crtc_state_for_each_plane_state(),
+ * intel_atomic_crtc_state_for_each_plane_state(),
* and needs the master plane state which may be
* NULL if we try get_new_plane_state(), so we
* always calculate from the master.
@@ -4706,20 +4711,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state,
struct skl_wm_params *wp, int color_plane)
{
- struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
const struct drm_framebuffer *fb = plane_state->base.fb;
int width;
- if (plane->id == PLANE_CURSOR) {
- width = plane_state->base.crtc_w;
- } else {
- /*
- * Src coordinates are already rotated by 270 degrees for
- * the 90/270 degree plane rotation cases (to match the
- * GTT mapping), hence no need to account for rotation here.
- */
- width = drm_rect_width(&plane_state->base.src) >> 16;
- }
+ /*
+ * Src coordinates are already rotated by 270 degrees for
+ * the 90/270 degree plane rotation cases (to match the
+ * GTT mapping), hence no need to account for rotation here.
+ */
+ width = drm_rect_width(&plane_state->base.src) >> 16;
return skl_compute_wm_params(crtc_state, width,
fb->format, fb->modifier,
@@ -5065,8 +5065,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
- struct drm_plane *plane;
- const struct drm_plane_state *drm_plane_state;
+ struct intel_plane *plane;
+ const struct intel_plane_state *plane_state;
int ret;
/*
@@ -5075,10 +5075,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state)
*/
memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
- drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state,
- &crtc_state->base) {
- const struct intel_plane_state *plane_state =
- to_intel_plane_state(drm_plane_state);
+ intel_atomic_crtc_state_for_each_plane_state(plane, plane_state,
+ crtc_state) {
if (INTEL_GEN(dev_priv) >= 11)
ret = icl_build_plane_wm(crtc_state, plane_state);
@@ -5238,19 +5236,6 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
return false;
}
-static u32
-pipes_modified(struct intel_atomic_state *state)
-{
- struct intel_crtc *crtc;
- struct intel_crtc_state *crtc_state;
- u32 i, ret = 0;
-
- for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
- ret |= drm_crtc_mask(&crtc->base);
-
- return ret;
-}
-
static int
skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
struct intel_crtc_state *new_crtc_state)
@@ -5426,36 +5411,27 @@ skl_print_wm_changes(struct intel_atomic_state *state)
}
}
-static int
-skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
+static int intel_add_all_pipes(struct intel_atomic_state *state)
{
- struct drm_device *dev = state->base.dev;
- const struct drm_i915_private *dev_priv = to_i915(dev);
+ struct drm_i915_private *dev_priv = to_i915(state->base.dev);
struct intel_crtc *crtc;
- struct intel_crtc_state *crtc_state;
- u32 realloc_pipes = pipes_modified(state);
- int ret, i;
- /*
- * When we distrust bios wm we always need to recompute to set the
- * expected DDB allocations for each CRTC.
- */
- if (dev_priv->wm.distrust_bios_wm)
- (*changed) = true;
+ for_each_intel_crtc(&dev_priv->drm, crtc) {
+ struct intel_crtc_state *crtc_state;
- /*
- * If this transaction isn't actually touching any CRTC's, don't
- * bother with watermark calculation. Note that if we pass this
- * test, we're guaranteed to hold at least one CRTC state mutex,
- * which means we can safely use values like dev_priv->active_pipes
- * since any racing commits that want to update them would need to
- * hold _all_ CRTC state mutexes.
- */
- for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
- (*changed) = true;
+ crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
+ }
- if (!*changed)
- return 0;
+ return 0;
+}
+
+static int
+skl_ddb_add_affected_pipes(struct intel_atomic_state *state)
+{
+ struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+ int ret;
/*
* If this is our first atomic update following hardware readout,
@@ -5464,7 +5440,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
* ensure a full DDB recompute.
*/
if (dev_priv->wm.distrust_bios_wm) {
- ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+ ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
state->base.acquire_ctx);
if (ret)
return ret;
@@ -5495,18 +5471,11 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
* to grab the lock on *all* CRTC's.
*/
if (state->active_pipe_changes || state->modeset) {
- realloc_pipes = ~0;
state->wm_results.dirty_pipes = ~0;
- }
- /*
- * We're not recomputing for the pipes not included in the commit, so
- * make sure we start with the current state.
- */
- for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
- crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
- if (IS_ERR(crtc_state))
- return PTR_ERR(crtc_state);
+ ret = intel_add_all_pipes(state);
+ if (ret)
+ return ret;
}
return 0;
@@ -5579,14 +5548,13 @@ skl_compute_wm(struct intel_atomic_state *state)
struct intel_crtc_state *new_crtc_state;
struct intel_crtc_state *old_crtc_state;
struct skl_ddb_values *results = &state->wm_results;
- bool changed = false;
int ret, i;
/* Clear all dirty flags */
results->dirty_pipes = 0;
- ret = skl_ddb_add_affected_pipes(state, &changed);
- if (ret || !changed)
+ ret = skl_ddb_add_affected_pipes(state);
+ if (ret)
return ret;
/*
@@ -5608,7 +5576,7 @@ skl_compute_wm(struct intel_atomic_state *state)
if (!skl_pipe_wm_equals(crtc,
&old_crtc_state->wm.skl.optimal,
&new_crtc_state->wm.skl.optimal))
- results->dirty_pipes |= drm_crtc_mask(&crtc->base);
+ results->dirty_pipes |= BIT(crtc->pipe);
}
ret = skl_compute_ddb(state);
@@ -5628,7 +5596,7 @@ static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
enum pipe pipe = crtc->pipe;
- if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
+ if ((state->wm_results.dirty_pipes & BIT(crtc->pipe)) == 0)
return;
I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
@@ -5637,12 +5605,11 @@ static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
static void skl_initial_wm(struct intel_atomic_state *state,
struct intel_crtc_state *crtc_state)
{
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
- struct drm_device *dev = intel_crtc->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
+ struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
struct skl_ddb_values *results = &state->wm_results;
- if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
+ if ((results->dirty_pipes & BIT(crtc->pipe)) == 0)
return;
mutex_lock(&dev_priv->wm.wm_mutex);
@@ -5791,7 +5758,7 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
if (crtc->active)
- hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
+ hw->dirty_pipes |= BIT(crtc->pipe);
}
if (dev_priv->active_pipes) {
@@ -7064,93 +7031,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
}
-static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
-{
- struct intel_rps *rps = &dev_priv->gt_pm.rps;
- const int min_freq = 15;
- const int scaling_factor = 180;
- unsigned int gpu_freq;
- unsigned int max_ia_freq, min_ring_freq;
- unsigned int max_gpu_freq, min_gpu_freq;
- struct cpufreq_policy *policy;
-
- lockdep_assert_held(&rps->lock);
-
- if (rps->max_freq <= rps->min_freq)
- return;
-
- policy = cpufreq_cpu_get(0);
- if (policy) {
- max_ia_freq = policy->cpuinfo.max_freq;
- cpufreq_cpu_put(policy);
- } else {
- /*
- * Default to measured freq if none found, PCU will ensure we
- * don't go over
- */
- max_ia_freq = tsc_khz;
- }
-
- /* Convert from kHz to MHz */
- max_ia_freq /= 1000;
-
- min_ring_freq = I915_READ(DCLK) & 0xf;
- /* convert DDR frequency from units of 266.6MHz to bandwidth */
- min_ring_freq = mult_frac(min_ring_freq, 8, 3);
-
- min_gpu_freq = rps->min_freq;
- max_gpu_freq = rps->max_freq;
- if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
- /* Convert GT frequency to 50 HZ units */
- min_gpu_freq /= GEN9_FREQ_SCALER;
- max_gpu_freq /= GEN9_FREQ_SCALER;
- }
-
- /*
- * For each potential GPU frequency, load a ring frequency we'd like
- * to use for memory access. We do this by specifying the IA frequency
- * the PCU should use as a reference to determine the ring frequency.
- */
- for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
- const int diff = max_gpu_freq - gpu_freq;
- unsigned int ia_freq = 0, ring_freq = 0;
-
- if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
- /*
- * ring_freq = 2 * GT. ring_freq is in 100MHz units
- * No floor required for ring frequency on SKL.
- */
- ring_freq = gpu_freq;
- } else if (INTEL_GEN(dev_priv) >= 8) {
- /* max(2 * GT, DDR). NB: GT is 50MHz units */
- ring_freq = max(min_ring_freq, gpu_freq);
- } else if (IS_HASWELL(dev_priv)) {
- ring_freq = mult_frac(gpu_freq, 5, 4);
- ring_freq = max(min_ring_freq, ring_freq);
- /* leave ia_freq as the default, chosen by cpufreq */
- } else {
- /* On older processors, there is no separate ring
- * clock domain, so in order to boost the bandwidth
- * of the ring, we need to upclock the CPU (ia_freq).
- *
- * For GPU frequencies less than 750MHz,
- * just use the lowest ring freq.
- */
- if (gpu_freq < min_freq)
- ia_freq = 800;
- else
- ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
- ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
- }
-
- sandybridge_pcode_write(dev_priv,
- GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
- ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
- ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
- gpu_freq);
- }
-}
-
static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
{
u32 val, rp0;
@@ -7999,18 +7879,6 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
gen6_reset_rps_interrupts(dev_priv);
}
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
-{
- lockdep_assert_held(&i915->gt_pm.rps.lock);
-
- if (!i915->gt_pm.llc_pstate.enabled)
- return;
-
- /* Currently there is no HW configuration to be done to disable. */
-
- i915->gt_pm.llc_pstate.enabled = false;
-}
-
static void intel_disable_rps(struct drm_i915_private *dev_priv)
{
lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
@@ -8038,23 +7906,11 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
intel_disable_rps(dev_priv);
if (HAS_LLC(dev_priv))
- intel_disable_llc_pstate(dev_priv);
+ intel_llc_disable(&dev_priv->gt.llc);
mutex_unlock(&dev_priv->gt_pm.rps.lock);
}
-static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
-{
- lockdep_assert_held(&i915->gt_pm.rps.lock);
-
- if (i915->gt_pm.llc_pstate.enabled)
- return;
-
- gen6_update_ring_freq(i915);
-
- i915->gt_pm.llc_pstate.enabled = true;
-}
-
static void intel_enable_rps(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -8098,8 +7954,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
if (HAS_RPS(dev_priv))
intel_enable_rps(dev_priv);
- if (HAS_LLC(dev_priv))
- intel_enable_llc_pstate(dev_priv);
+
+ intel_llc_enable(&dev_priv->gt.llc);
mutex_unlock(&dev_priv->gt_pm.rps.lock);
}
@@ -9013,6 +8869,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
else if (IS_GEN(dev_priv, 5))
i915_ironlake_get_mem_freq(dev_priv);
+ if (intel_has_sagv(dev_priv))
+ skl_setup_sagv_block_time(dev_priv);
+
/* For FIFO watermark updates */
if (INTEL_GEN(dev_priv) >= 9) {
skl_setup_wm_latency(dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index bfa40a5b6d98..97f89f744ee2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -120,7 +120,7 @@ static void pm_resume(struct drm_i915_private *i915)
i915_gem_sanitize(i915);
i915_gem_restore_gtt_mappings(i915);
- i915_gem_restore_fences(i915);
+ i915_gem_restore_fences(&i915->ggtt);
i915_gem_resume(i915);
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 165b3a7f9744..ebe735df6504 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -82,8 +82,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj)
}
GEM_BUG_ON(rem);
- obj->mm.madv = I915_MADV_DONTNEED;
-
__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
return 0;
@@ -95,7 +93,6 @@ static void fake_put_pages(struct drm_i915_gem_object *obj,
{
fake_free_pages(obj, pages);
obj->mm.dirty = false;
- obj->mm.madv = I915_MADV_WILLNEED;
}
static const struct drm_i915_gem_object_ops fake_ops = {
@@ -122,6 +119,8 @@ fake_dma_object(struct drm_i915_private *i915, u64 size)
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &fake_ops);
+ i915_gem_object_set_volatile(obj);
+
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6713efea350b..6daf6599ec79 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -35,3 +35,4 @@ selftest(reset, intel_reset_live_selftests)
selftest(hangcheck, intel_hangcheck_live_selftests)
selftest(execlists, intel_execlists_live_selftests)
selftest(guc, intel_guc_live_selftest)
+selftest(perf, i915_perf_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index b88084fe3269..aa5a0e7f5d9e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -26,3 +26,4 @@ selftest(gtt, i915_gem_gtt_mock_selftests)
selftest(hugepages, i915_gem_huge_page_mock_selftests)
selftest(contexts, i915_gem_context_mock_selftests)
selftest(buddy, i915_buddy_mock_selftests)
+selftest(memory_region, intel_memory_region_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
new file mode 100644
index 000000000000..dc6d689e4251
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -0,0 +1,216 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kref.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
+
+#include "i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+static struct i915_perf_stream *
+test_stream(struct i915_perf *perf)
+{
+ struct drm_i915_perf_open_param param = {};
+ struct perf_open_properties props = {
+ .engine = intel_engine_lookup_user(perf->i915,
+ I915_ENGINE_CLASS_RENDER,
+ 0),
+ .sample_flags = SAMPLE_OA_REPORT,
+ .oa_format = I915_OA_FORMAT_C4_B8,
+ .metrics_set = 1,
+ };
+ struct i915_perf_stream *stream;
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (!stream)
+ return NULL;
+
+ stream->perf = perf;
+
+ mutex_lock(&perf->lock);
+ if (i915_oa_stream_init(stream, &param, &props)) {
+ kfree(stream);
+ stream = NULL;
+ }
+ mutex_unlock(&perf->lock);
+
+ return stream;
+}
+
+static void stream_destroy(struct i915_perf_stream *stream)
+{
+ struct i915_perf *perf = stream->perf;
+
+ mutex_lock(&perf->lock);
+ i915_perf_destroy_locked(stream);
+ mutex_unlock(&perf->lock);
+}
+
+static int live_sanitycheck(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_perf_stream *stream;
+
+ /* Quick check we can create a perf stream */
+
+ stream = test_stream(&i915->perf);
+ if (!stream)
+ return -EINVAL;
+
+ stream_destroy(stream);
+ return 0;
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+ u32 *cs;
+ int len;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ len = 5;
+ if (INTEL_GEN(rq->i915) >= 8)
+ len++;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(len);
+ *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_WRITE_TIMESTAMP;
+ *cs++ = slot * sizeof(u32);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static ktime_t poll_status(struct i915_request *rq, int slot)
+{
+ while (!intel_read_status_page(rq->engine, slot) &&
+ !i915_request_completed(rq))
+ cpu_relax();
+
+ return ktime_get();
+}
+
+static int live_noa_delay(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_perf_stream *stream;
+ struct i915_request *rq;
+ ktime_t t0, t1;
+ u64 expected;
+ u32 delay;
+ int err;
+ int i;
+
+ /* Check that the GPU delays matches expectations */
+
+ stream = test_stream(&i915->perf);
+ if (!stream)
+ return -ENOMEM;
+
+ expected = atomic64_read(&stream->perf->noa_programming_delay);
+
+ if (stream->engine->class != RENDER_CLASS) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ for (i = 0; i < 4; i++)
+ intel_write_status_page(stream->engine, 0x100 + i, 0);
+
+ rq = i915_request_create(stream->engine->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ if (rq->engine->emit_init_breadcrumb &&
+ i915_request_timeline(rq)->has_initial_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ }
+
+ err = write_timestamp(rq, 0x100);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+
+ err = rq->engine->emit_bb_start(rq,
+ i915_ggtt_offset(stream->noa_wait), 0,
+ I915_DISPATCH_SECURE);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+
+ err = write_timestamp(rq, 0x102);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ preempt_disable();
+ t0 = poll_status(rq, 0x100);
+ t1 = poll_status(rq, 0x102);
+ preempt_enable();
+
+ pr_info("CPU delay: %lluns, expected %lluns\n",
+ ktime_sub(t1, t0), expected);
+
+ delay = intel_read_status_page(stream->engine, 0x102);
+ delay -= intel_read_status_page(stream->engine, 0x100);
+ delay = div_u64(mul_u32_u32(delay, 1000 * 1000),
+ RUNTIME_INFO(i915)->cs_timestamp_frequency_khz);
+ pr_info("GPU delay: %uns, expected %lluns\n",
+ delay, expected);
+
+ if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
+ pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
+ delay / 1000,
+ div_u64(3 * expected, 4000),
+ div_u64(3 * expected, 2000));
+ err = -EINVAL;
+ }
+
+ i915_request_put(rq);
+out:
+ stream_destroy(stream);
+ return err;
+}
+
+int i915_perf_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_sanitycheck),
+ SUBTEST(live_noa_delay),
+ };
+ struct i915_perf *perf = &i915->perf;
+
+ if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
+ return 0;
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 0897a7b04944..30ae34f62176 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -37,6 +37,18 @@
#include "mock_drm.h"
#include "mock_gem_device.h"
+static unsigned int num_uabi_engines(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ unsigned int count;
+
+ count = 0;
+ for_each_uabi_engine(engine, i915)
+ count++;
+
+ return count;
+}
+
static int igt_add_request(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -281,7 +293,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
* that the fences were marked as signaled.
*/
- requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
+ requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
if (!requests)
return -ENOMEM;
@@ -422,12 +434,11 @@ static int mock_breadcrumbs_smoketest(void *arg)
* See __igt_breadcrumbs_smoketest();
*/
- threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+ threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
if (!threads)
return -ENOMEM;
- t.contexts =
- kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
+ t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
if (!t.contexts) {
ret = -ENOMEM;
goto out_threads;
@@ -511,15 +522,15 @@ static int live_nop_request(void *arg)
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
struct igt_live_test t;
- unsigned int id;
int err = -ENODEV;
- /* Submit various sized batches of empty requests, to each engine
+ /*
+ * Submit various sized batches of empty requests, to each engine
* (individually), and wait for the batch to complete. We can check
* the overhead of submitting requests to the hardware.
*/
- for_each_engine(engine, i915, id) {
+ for_each_uabi_engine(engine, i915) {
unsigned long n, prime;
IGT_TIMEOUT(end_time);
ktime_t times[2] = {};
@@ -539,7 +550,8 @@ static int live_nop_request(void *arg)
if (IS_ERR(request))
return PTR_ERR(request);
- /* This space is left intentionally blank.
+ /*
+ * This space is left intentionally blank.
*
* We do not actually want to perform any
* action with this request, we just want
@@ -657,10 +669,10 @@ static int live_empty_request(void *arg)
struct intel_engine_cs *engine;
struct igt_live_test t;
struct i915_vma *batch;
- unsigned int id;
int err = 0;
- /* Submit various sized batches of empty requests, to each engine
+ /*
+ * Submit various sized batches of empty requests, to each engine
* (individually), and wait for the batch to complete. We can check
* the overhead of submitting requests to the hardware.
*/
@@ -669,7 +681,7 @@ static int live_empty_request(void *arg)
if (IS_ERR(batch))
return PTR_ERR(batch);
- for_each_engine(engine, i915, id) {
+ for_each_uabi_engine(engine, i915) {
IGT_TIMEOUT(end_time);
struct i915_request *request;
unsigned long n, prime;
@@ -801,63 +813,73 @@ static int recursive_batch_resolve(struct i915_vma *batch)
static int live_all_engines(void *arg)
{
struct drm_i915_private *i915 = arg;
+ const unsigned int nengines = num_uabi_engines(i915);
struct intel_engine_cs *engine;
- struct i915_request *request[I915_NUM_ENGINES];
+ struct i915_request **request;
struct igt_live_test t;
struct i915_vma *batch;
- unsigned int id;
+ unsigned int idx;
int err;
- /* Check we can submit requests to all engines simultaneously. We
+ /*
+ * Check we can submit requests to all engines simultaneously. We
* send a recursive batch to each engine - checking that we don't
* block doing so, and that they don't complete too soon.
*/
+ request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
+ if (!request)
+ return -ENOMEM;
+
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
- return err;
+ goto out_free;
batch = recursive_batch(i915);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
- return err;
+ goto out_free;
}
- for_each_engine(engine, i915, id) {
- request[id] = i915_request_create(engine->kernel_context);
- if (IS_ERR(request[id])) {
- err = PTR_ERR(request[id]);
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ request[idx] = i915_request_create(engine->kernel_context);
+ if (IS_ERR(request[idx])) {
+ err = PTR_ERR(request[idx]);
pr_err("%s: Request allocation failed with err=%d\n",
__func__, err);
goto out_request;
}
- err = engine->emit_bb_start(request[id],
+ err = engine->emit_bb_start(request[idx],
batch->node.start,
batch->node.size,
0);
GEM_BUG_ON(err);
- request[id]->batch = batch;
+ request[idx]->batch = batch;
i915_vma_lock(batch);
- err = i915_request_await_object(request[id], batch->obj, 0);
+ err = i915_request_await_object(request[idx], batch->obj, 0);
if (err == 0)
- err = i915_vma_move_to_active(batch, request[id], 0);
+ err = i915_vma_move_to_active(batch, request[idx], 0);
i915_vma_unlock(batch);
GEM_BUG_ON(err);
- i915_request_get(request[id]);
- i915_request_add(request[id]);
+ i915_request_get(request[idx]);
+ i915_request_add(request[idx]);
+ idx++;
}
- for_each_engine(engine, i915, id) {
- if (i915_request_completed(request[id])) {
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ if (i915_request_completed(request[idx])) {
pr_err("%s(%s): request completed too early!\n",
__func__, engine->name);
err = -EINVAL;
goto out_request;
}
+ idx++;
}
err = recursive_batch_resolve(batch);
@@ -866,10 +888,11 @@ static int live_all_engines(void *arg)
goto out_request;
}
- for_each_engine(engine, i915, id) {
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
long timeout;
- timeout = i915_request_wait(request[id], 0,
+ timeout = i915_request_wait(request[idx], 0,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) {
err = timeout;
@@ -878,43 +901,56 @@ static int live_all_engines(void *arg)
goto out_request;
}
- GEM_BUG_ON(!i915_request_completed(request[id]));
- i915_request_put(request[id]);
- request[id] = NULL;
+ GEM_BUG_ON(!i915_request_completed(request[idx]));
+ i915_request_put(request[idx]);
+ request[idx] = NULL;
+ idx++;
}
err = igt_live_test_end(&t);
out_request:
- for_each_engine(engine, i915, id)
- if (request[id])
- i915_request_put(request[id]);
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ if (request[idx])
+ i915_request_put(request[idx]);
+ idx++;
+ }
i915_vma_unpin(batch);
i915_vma_put(batch);
+out_free:
+ kfree(request);
return err;
}
static int live_sequential_engines(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct i915_request *request[I915_NUM_ENGINES] = {};
+ const unsigned int nengines = num_uabi_engines(i915);
+ struct i915_request **request;
struct i915_request *prev = NULL;
struct intel_engine_cs *engine;
struct igt_live_test t;
- unsigned int id;
+ unsigned int idx;
int err;
- /* Check we can submit requests to all engines sequentially, such
+ /*
+ * Check we can submit requests to all engines sequentially, such
* that each successive request waits for the earlier ones. This
* tests that we don't execute requests out of order, even though
* they are running on independent engines.
*/
+ request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
+ if (!request)
+ return -ENOMEM;
+
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
- return err;
+ goto out_free;
- for_each_engine(engine, i915, id) {
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
struct i915_vma *batch;
batch = recursive_batch(i915);
@@ -922,66 +958,69 @@ static int live_sequential_engines(void *arg)
err = PTR_ERR(batch);
pr_err("%s: Unable to create batch for %s, err=%d\n",
__func__, engine->name, err);
- return err;
+ goto out_free;
}
- request[id] = i915_request_create(engine->kernel_context);
- if (IS_ERR(request[id])) {
- err = PTR_ERR(request[id]);
+ request[idx] = i915_request_create(engine->kernel_context);
+ if (IS_ERR(request[idx])) {
+ err = PTR_ERR(request[idx]);
pr_err("%s: Request allocation failed for %s with err=%d\n",
__func__, engine->name, err);
goto out_request;
}
if (prev) {
- err = i915_request_await_dma_fence(request[id],
+ err = i915_request_await_dma_fence(request[idx],
&prev->fence);
if (err) {
- i915_request_add(request[id]);
+ i915_request_add(request[idx]);
pr_err("%s: Request await failed for %s with err=%d\n",
__func__, engine->name, err);
goto out_request;
}
}
- err = engine->emit_bb_start(request[id],
+ err = engine->emit_bb_start(request[idx],
batch->node.start,
batch->node.size,
0);
GEM_BUG_ON(err);
- request[id]->batch = batch;
+ request[idx]->batch = batch;
i915_vma_lock(batch);
- err = i915_request_await_object(request[id], batch->obj, false);
+ err = i915_request_await_object(request[idx],
+ batch->obj, false);
if (err == 0)
- err = i915_vma_move_to_active(batch, request[id], 0);
+ err = i915_vma_move_to_active(batch, request[idx], 0);
i915_vma_unlock(batch);
GEM_BUG_ON(err);
- i915_request_get(request[id]);
- i915_request_add(request[id]);
+ i915_request_get(request[idx]);
+ i915_request_add(request[idx]);
- prev = request[id];
+ prev = request[idx];
+ idx++;
}
- for_each_engine(engine, i915, id) {
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
long timeout;
- if (i915_request_completed(request[id])) {
+ if (i915_request_completed(request[idx])) {
pr_err("%s(%s): request completed too early!\n",
__func__, engine->name);
err = -EINVAL;
goto out_request;
}
- err = recursive_batch_resolve(request[id]->batch);
+ err = recursive_batch_resolve(request[idx]->batch);
if (err) {
pr_err("%s: failed to resolve batch, err=%d\n",
__func__, err);
goto out_request;
}
- timeout = i915_request_wait(request[id], 0,
+ timeout = i915_request_wait(request[idx], 0,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) {
err = timeout;
@@ -990,30 +1029,35 @@ static int live_sequential_engines(void *arg)
goto out_request;
}
- GEM_BUG_ON(!i915_request_completed(request[id]));
+ GEM_BUG_ON(!i915_request_completed(request[idx]));
+ idx++;
}
err = igt_live_test_end(&t);
out_request:
- for_each_engine(engine, i915, id) {
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
u32 *cmd;
- if (!request[id])
+ if (!request[idx])
break;
- cmd = i915_gem_object_pin_map(request[id]->batch->obj,
+ cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
I915_MAP_WC);
if (!IS_ERR(cmd)) {
*cmd = MI_BATCH_BUFFER_END;
intel_gt_chipset_flush(engine->gt);
- i915_gem_object_unpin_map(request[id]->batch->obj);
+ i915_gem_object_unpin_map(request[idx]->batch->obj);
}
- i915_vma_put(request[id]->batch);
- i915_request_put(request[id]);
+ i915_vma_put(request[idx]->batch);
+ i915_request_put(request[idx]);
+ idx++;
}
+out_free:
+ kfree(request);
return err;
}
@@ -1079,9 +1123,10 @@ static int live_parallel_engines(void *arg)
__live_parallel_engineN,
NULL,
};
+ const unsigned int nengines = num_uabi_engines(i915);
struct intel_engine_cs *engine;
- enum intel_engine_id id;
int (* const *fn)(void *arg);
+ struct task_struct **tsk;
int err = 0;
/*
@@ -1089,42 +1134,49 @@ static int live_parallel_engines(void *arg)
* tests that we load up the system maximally.
*/
+ tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
+ if (!tsk)
+ return -ENOMEM;
+
for (fn = func; !err && *fn; fn++) {
- struct task_struct *tsk[I915_NUM_ENGINES] = {};
struct igt_live_test t;
+ unsigned int idx;
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
break;
- for_each_engine(engine, i915, id) {
- tsk[id] = kthread_run(*fn, engine,
- "igt/parallel:%s",
- engine->name);
- if (IS_ERR(tsk[id])) {
- err = PTR_ERR(tsk[id]);
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ tsk[idx] = kthread_run(*fn, engine,
+ "igt/parallel:%s",
+ engine->name);
+ if (IS_ERR(tsk[idx])) {
+ err = PTR_ERR(tsk[idx]);
break;
}
- get_task_struct(tsk[id]);
+ get_task_struct(tsk[idx++]);
}
- for_each_engine(engine, i915, id) {
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
int status;
- if (IS_ERR_OR_NULL(tsk[id]))
- continue;
+ if (IS_ERR(tsk[idx]))
+ break;
- status = kthread_stop(tsk[id]);
+ status = kthread_stop(tsk[idx]);
if (status && !err)
err = status;
- put_task_struct(tsk[id]);
+ put_task_struct(tsk[idx++]);
}
if (igt_live_test_end(&t))
err = -EIO;
}
+ kfree(tsk);
return err;
}
@@ -1168,16 +1220,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
static int live_breadcrumbs_smoketest(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct smoketest t[I915_NUM_ENGINES];
- unsigned int ncpus = num_online_cpus();
+ const unsigned int nengines = num_uabi_engines(i915);
+ const unsigned int ncpus = num_online_cpus();
unsigned long num_waits, num_fences;
struct intel_engine_cs *engine;
struct task_struct **threads;
struct igt_live_test live;
- enum intel_engine_id id;
intel_wakeref_t wakeref;
struct drm_file *file;
- unsigned int n;
+ struct smoketest *smoke;
+ unsigned int n, idx;
int ret = 0;
/*
@@ -1196,28 +1248,31 @@ static int live_breadcrumbs_smoketest(void *arg)
goto out_rpm;
}
- threads = kcalloc(ncpus * I915_NUM_ENGINES,
- sizeof(*threads),
- GFP_KERNEL);
- if (!threads) {
+ smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
+ if (!smoke) {
ret = -ENOMEM;
goto out_file;
}
- memset(&t[0], 0, sizeof(t[0]));
- t[0].request_alloc = __live_request_alloc;
- t[0].ncontexts = 64;
- t[0].contexts = kmalloc_array(t[0].ncontexts,
- sizeof(*t[0].contexts),
- GFP_KERNEL);
- if (!t[0].contexts) {
+ threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
+ if (!threads) {
+ ret = -ENOMEM;
+ goto out_smoke;
+ }
+
+ smoke[0].request_alloc = __live_request_alloc;
+ smoke[0].ncontexts = 64;
+ smoke[0].contexts = kcalloc(smoke[0].ncontexts,
+ sizeof(*smoke[0].contexts),
+ GFP_KERNEL);
+ if (!smoke[0].contexts) {
ret = -ENOMEM;
goto out_threads;
}
- for (n = 0; n < t[0].ncontexts; n++) {
- t[0].contexts[n] = live_context(i915, file);
- if (!t[0].contexts[n]) {
+ for (n = 0; n < smoke[0].ncontexts; n++) {
+ smoke[0].contexts[n] = live_context(i915, file);
+ if (!smoke[0].contexts[n]) {
ret = -ENOMEM;
goto out_contexts;
}
@@ -1227,42 +1282,47 @@ static int live_breadcrumbs_smoketest(void *arg)
if (ret)
goto out_contexts;
- for_each_engine(engine, i915, id) {
- t[id] = t[0];
- t[id].engine = engine;
- t[id].max_batch = max_batches(t[0].contexts[0], engine);
- if (t[id].max_batch < 0) {
- ret = t[id].max_batch;
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ smoke[idx] = smoke[0];
+ smoke[idx].engine = engine;
+ smoke[idx].max_batch =
+ max_batches(smoke[0].contexts[0], engine);
+ if (smoke[idx].max_batch < 0) {
+ ret = smoke[idx].max_batch;
goto out_flush;
}
/* One ring interleaved between requests from all cpus */
- t[id].max_batch /= num_online_cpus() + 1;
+ smoke[idx].max_batch /= num_online_cpus() + 1;
pr_debug("Limiting batches to %d requests on %s\n",
- t[id].max_batch, engine->name);
+ smoke[idx].max_batch, engine->name);
for (n = 0; n < ncpus; n++) {
struct task_struct *tsk;
tsk = kthread_run(__igt_breadcrumbs_smoketest,
- &t[id], "igt/%d.%d", id, n);
+ &smoke[idx], "igt/%d.%d", idx, n);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);
goto out_flush;
}
get_task_struct(tsk);
- threads[id * ncpus + n] = tsk;
+ threads[idx * ncpus + n] = tsk;
}
+
+ idx++;
}
msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
out_flush:
+ idx = 0;
num_waits = 0;
num_fences = 0;
- for_each_engine(engine, i915, id) {
+ for_each_uabi_engine(engine, i915) {
for (n = 0; n < ncpus; n++) {
- struct task_struct *tsk = threads[id * ncpus + n];
+ struct task_struct *tsk = threads[idx * ncpus + n];
int err;
if (!tsk)
@@ -1275,17 +1335,20 @@ out_flush:
put_task_struct(tsk);
}
- num_waits += atomic_long_read(&t[id].num_waits);
- num_fences += atomic_long_read(&t[id].num_fences);
+ num_waits += atomic_long_read(&smoke[idx].num_waits);
+ num_fences += atomic_long_read(&smoke[idx].num_fences);
+ idx++;
}
pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
ret = igt_live_test_end(&live) ?: ret;
out_contexts:
- kfree(t[0].contexts);
+ kfree(smoke[0].contexts);
out_threads:
kfree(threads);
+out_smoke:
+ kfree(smoke);
out_file:
mock_file_free(i915, file);
out_rpm:
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
index 7ec8f8b049c6..9f8590b868a9 100644
--- a/drivers/gpu/drm/i915/selftests/igt_reset.c
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
@@ -22,7 +22,7 @@ void igt_global_reset_lock(struct intel_gt *gt)
wait_event(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
while (test_and_set_bit(I915_RESET_ENGINE + id,
&gt->reset.flags))
wait_on_bit(&gt->reset.flags, I915_RESET_ENGINE + id,
@@ -35,7 +35,7 @@ void igt_global_reset_unlock(struct intel_gt *gt)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
clear_bit(I915_RESET_BACKOFF, &gt->reset.flags);
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
new file mode 100644
index 000000000000..56091e7e599e
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "../i915_selftest.h"
+
+#include "mock_drm.h"
+#include "mock_gem_device.h"
+#include "mock_region.h"
+
+#include "gem/i915_gem_region.h"
+#include "gem/selftests/mock_context.h"
+#include "selftests/i915_random.h"
+
+static void close_objects(struct intel_memory_region *mem,
+ struct list_head *objects)
+{
+ struct drm_i915_private *i915 = mem->i915;
+ struct drm_i915_gem_object *obj, *on;
+
+ list_for_each_entry_safe(obj, on, objects, st_link) {
+ if (i915_gem_object_has_pinned_pages(obj))
+ i915_gem_object_unpin_pages(obj);
+ /* No polluting the memory region between tests */
+ __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ list_del(&obj->st_link);
+ i915_gem_object_put(obj);
+ }
+
+ cond_resched();
+
+ i915_gem_drain_freed_objects(i915);
+}
+
+static int igt_mock_fill(void *arg)
+{
+ struct intel_memory_region *mem = arg;
+ resource_size_t total = resource_size(&mem->region);
+ resource_size_t page_size;
+ resource_size_t rem;
+ unsigned long max_pages;
+ unsigned long page_num;
+ LIST_HEAD(objects);
+ int err = 0;
+
+ page_size = mem->mm.chunk_size;
+ max_pages = div64_u64(total, page_size);
+ rem = total;
+
+ for_each_prime_number_from(page_num, 1, max_pages) {
+ resource_size_t size = page_num * page_size;
+ struct drm_i915_gem_object *obj;
+
+ obj = i915_gem_object_create_region(mem, size, 0);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ break;
+ }
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err) {
+ i915_gem_object_put(obj);
+ break;
+ }
+
+ list_add(&obj->st_link, &objects);
+ rem -= size;
+ }
+
+ if (err == -ENOMEM)
+ err = 0;
+ if (err == -ENXIO) {
+ if (page_num * page_size <= rem) {
+ pr_err("%s failed, space still left in region\n",
+ __func__);
+ err = -EINVAL;
+ } else {
+ err = 0;
+ }
+ }
+
+ close_objects(mem, &objects);
+
+ return err;
+}
+
+static struct drm_i915_gem_object *
+igt_object_create(struct intel_memory_region *mem,
+ struct list_head *objects,
+ u64 size,
+ unsigned int flags)
+{
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_region(mem, size, flags);
+ if (IS_ERR(obj))
+ return obj;
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto put;
+
+ list_add(&obj->st_link, objects);
+ return obj;
+
+put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static void igt_object_release(struct drm_i915_gem_object *obj)
+{
+ i915_gem_object_unpin_pages(obj);
+ __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+ list_del(&obj->st_link);
+ i915_gem_object_put(obj);
+}
+
+static int igt_mock_contiguous(void *arg)
+{
+ struct intel_memory_region *mem = arg;
+ struct drm_i915_gem_object *obj;
+ unsigned long n_objects;
+ LIST_HEAD(objects);
+ LIST_HEAD(holes);
+ I915_RND_STATE(prng);
+ resource_size_t total;
+ resource_size_t min;
+ u64 target;
+ int err = 0;
+
+ total = resource_size(&mem->region);
+
+ /* Min size */
+ obj = igt_object_create(mem, &objects, mem->mm.chunk_size,
+ I915_BO_ALLOC_CONTIGUOUS);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ if (obj->mm.pages->nents != 1) {
+ pr_err("%s min object spans multiple sg entries\n", __func__);
+ err = -EINVAL;
+ goto err_close_objects;
+ }
+
+ igt_object_release(obj);
+
+ /* Max size */
+ obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ if (obj->mm.pages->nents != 1) {
+ pr_err("%s max object spans multiple sg entries\n", __func__);
+ err = -EINVAL;
+ goto err_close_objects;
+ }
+
+ igt_object_release(obj);
+
+ /* Internal fragmentation should not bleed into the object size */
+ target = i915_prandom_u64_state(&prng);
+ div64_u64_rem(target, total, &target);
+ target = round_up(target, PAGE_SIZE);
+ target = max_t(u64, PAGE_SIZE, target);
+
+ obj = igt_object_create(mem, &objects, target,
+ I915_BO_ALLOC_CONTIGUOUS);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ if (obj->base.size != target) {
+ pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
+ obj->base.size, target);
+ err = -EINVAL;
+ goto err_close_objects;
+ }
+
+ if (obj->mm.pages->nents != 1) {
+ pr_err("%s object spans multiple sg entries\n", __func__);
+ err = -EINVAL;
+ goto err_close_objects;
+ }
+
+ igt_object_release(obj);
+
+ /*
+ * Try to fragment the address space, such that half of it is free, but
+ * the max contiguous block size is SZ_64K.
+ */
+
+ target = SZ_64K;
+ n_objects = div64_u64(total, target);
+
+ while (n_objects--) {
+ struct list_head *list;
+
+ if (n_objects % 2)
+ list = &holes;
+ else
+ list = &objects;
+
+ obj = igt_object_create(mem, list, target,
+ I915_BO_ALLOC_CONTIGUOUS);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_close_objects;
+ }
+ }
+
+ close_objects(mem, &holes);
+
+ min = target;
+ target = total >> 1;
+
+ /* Make sure we can still allocate all the fragmented space */
+ obj = igt_object_create(mem, &objects, target, 0);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_close_objects;
+ }
+
+ igt_object_release(obj);
+
+ /*
+ * Even though we have enough free space, we don't have a big enough
+ * contiguous block. Make sure that holds true.
+ */
+
+ do {
+ bool should_fail = target > min;
+
+ obj = igt_object_create(mem, &objects, target,
+ I915_BO_ALLOC_CONTIGUOUS);
+ if (should_fail != IS_ERR(obj)) {
+ pr_err("%s target allocation(%llx) mismatch\n",
+ __func__, target);
+ err = -EINVAL;
+ goto err_close_objects;
+ }
+
+ target >>= 1;
+ } while (target >= mem->mm.chunk_size);
+
+err_close_objects:
+ list_splice_tail(&holes, &objects);
+ close_objects(mem, &objects);
+ return err;
+}
+
+int intel_memory_region_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_mock_fill),
+ SUBTEST(igt_mock_contiguous),
+ };
+ struct intel_memory_region *mem;
+ struct drm_i915_private *i915;
+ int err;
+
+ i915 = mock_gem_device();
+ if (!i915)
+ return -ENOMEM;
+
+ mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+ if (IS_ERR(mem)) {
+ pr_err("failed to create memory region\n");
+ err = PTR_ERR(mem);
+ goto out_unref;
+ }
+
+ err = i915_subtests(tests, mem);
+
+ intel_memory_region_put(mem);
+out_unref:
+ drm_dev_put(&i915->drm);
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 70a7026db08d..cb8c3a501cc7 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -33,6 +33,7 @@
#include "mock_gem_device.h"
#include "mock_gtt.h"
#include "mock_uncore.h"
+#include "mock_region.h"
#include "gem/selftests/mock_context.h"
#include "gem/selftests/mock_gem_object.h"
@@ -71,7 +72,7 @@ static void mock_device_release(struct drm_device *dev)
mock_fini_ggtt(&i915->ggtt);
destroy_workqueue(i915->wq);
- i915_gemfs_fini(i915);
+ i915_gem_cleanup_memory_regions(i915);
drm_mode_config_cleanup(&i915->drm);
@@ -162,7 +163,10 @@ struct drm_i915_private *mock_gem_device(void)
I915_GTT_PAGE_SIZE_64K |
I915_GTT_PAGE_SIZE_2M;
- mock_uncore_init(&i915->uncore);
+ mkwrite_device_info(i915)->memory_regions = REGION_SMEM;
+
+ mock_uncore_init(&i915->uncore, i915);
+
i915_gem_init__mm(i915);
intel_gt_init_early(&i915->gt, i915);
atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
@@ -193,7 +197,9 @@ struct drm_i915_private *mock_gem_device(void)
intel_engines_driver_register(i915);
- WARN_ON(i915_gemfs_init(i915));
+ err = i915_gem_init_memory_regions(i915);
+ if (err)
+ goto err_context;
return i915;
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
new file mode 100644
index 000000000000..7b0c99ddc2d5
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_region.h"
+#include "intel_memory_region.h"
+
+#include "mock_region.h"
+
+static const struct drm_i915_gem_object_ops mock_region_obj_ops = {
+ .get_pages = i915_gem_object_get_pages_buddy,
+ .put_pages = i915_gem_object_put_pages_buddy,
+ .release = i915_gem_object_release_memory_region,
+};
+
+static struct drm_i915_gem_object *
+mock_object_create(struct intel_memory_region *mem,
+ resource_size_t size,
+ unsigned int flags)
+{
+ struct drm_i915_private *i915 = mem->i915;
+ struct drm_i915_gem_object *obj;
+
+ if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size)
+ return ERR_PTR(-E2BIG);
+
+ obj = i915_gem_object_alloc();
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ drm_gem_private_object_init(&i915->drm, &obj->base, size);
+ i915_gem_object_init(obj, &mock_region_obj_ops);
+
+ obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
+ i915_gem_object_init_memory_region(obj, mem, flags);
+
+ return obj;
+}
+
+static const struct intel_memory_region_ops mock_region_ops = {
+ .init = intel_memory_region_init_buddy,
+ .release = intel_memory_region_release_buddy,
+ .create_object = mock_object_create,
+};
+
+struct intel_memory_region *
+mock_region_create(struct drm_i915_private *i915,
+ resource_size_t start,
+ resource_size_t size,
+ resource_size_t min_page_size,
+ resource_size_t io_start)
+{
+ return intel_memory_region_create(i915, start, size, min_page_size,
+ io_start, &mock_region_ops);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h
new file mode 100644
index 000000000000..24608089d833
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_region.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __MOCK_REGION_H
+#define __MOCK_REGION_H
+
+struct intel_memory_region *
+mock_region_create(struct drm_i915_private *i915,
+ resource_size_t start,
+ resource_size_t size,
+ resource_size_t min_page_size,
+ resource_size_t io_start);
+
+#endif /* !__MOCK_REGION_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c
index 49585f16d4a2..ca57e4008701 100644
--- a/drivers/gpu/drm/i915/selftests/mock_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -39,8 +39,11 @@ __nop_read(16)
__nop_read(32)
__nop_read(64)
-void mock_uncore_init(struct intel_uncore *uncore)
+void mock_uncore_init(struct intel_uncore *uncore,
+ struct drm_i915_private *i915)
{
+ intel_uncore_init_early(uncore, i915);
+
ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop);
ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop);
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h
index dacb36b5ffcd..8a2cc553f466 100644
--- a/drivers/gpu/drm/i915/selftests/mock_uncore.h
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h
@@ -25,6 +25,7 @@
#ifndef __MOCK_UNCORE_H
#define __MOCK_UNCORE_H
-void mock_uncore_init(struct intel_uncore *uncore);
+void mock_uncore_init(struct intel_uncore *uncore,
+ struct drm_i915_private *i915);
#endif /* !__MOCK_UNCORE_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 30c542144016..63d40cba97e0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
* See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
*/
#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION 54
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id {
* Open the stream for a specific context handle (as used with
* execbuffer2). A stream opened for a specific context this way
* won't typically require root privileges.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_CTX_HANDLE = 1,
/**
* A value of 1 requests the inclusion of raw OA unit reports as
* part of stream samples.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_SAMPLE_OA,
/**
* The value specifies which set of OA unit metrics should be
* be configured, defining the contents of any OA unit reports.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_METRICS_SET,
/**
* The value specifies the size and layout of OA unit reports.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_FORMAT,
@@ -1870,9 +1885,22 @@ enum drm_i915_perf_property_id {
* from this exponent as follows:
*
* 80ns * 2^(period_exponent + 1)
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_EXPONENT,
+ /**
+ * Specifying this property is only valid when specify a context to
+ * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
+ * will hold preemption of the particular context we want to gather
+ * performance data about. The execbuf2 submissions must include a
+ * drm_i915_gem_execbuffer_ext_perf parameter for this to apply.
+ *
+ * This property is available in perf revision 3.
+ */
+ DRM_I915_PERF_PROP_HOLD_PREEMPTION,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1901,6 +1929,8 @@ struct drm_i915_perf_open_param {
* to close and re-open a stream with the same configuration.
*
* It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
*/
#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
@@ -1908,10 +1938,25 @@ struct drm_i915_perf_open_param {
* Disable data capture for a stream.
*
* It is an error to try and read a stream that is disabled.
+ *
+ * This ioctl is available in perf revision 1.
*/
#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1)
/**
+ * Change metrics_set captured by a stream.
+ *
+ * If the stream is bound to a specific context, the configuration change
+ * will performed inline with that context such that it takes effect before
+ * the next execbuf submission.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
+/**
* Common to all i915 perf records
*/
struct drm_i915_perf_record_header {
@@ -1984,6 +2029,7 @@ struct drm_i915_query_item {
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
#define DRM_I915_QUERY_ENGINE_INFO 2
+#define DRM_I915_QUERY_PERF_CONFIG 3
/* Must be kept compact -- no holes and well documented */
/*
@@ -1995,9 +2041,18 @@ struct drm_i915_query_item {
__s32 length;
/*
- * Unused for now. Must be cleared to zero.
+ * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+ *
+ * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
+ * following :
+ * - DRM_I915_QUERY_PERF_CONFIG_LIST
+ * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+ * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
*/
__u32 flags;
+#define DRM_I915_QUERY_PERF_CONFIG_LIST 1
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3
/*
* Data will be written at the location pointed by data_ptr when the
@@ -2125,6 +2180,56 @@ struct drm_i915_query_engine_info {
struct drm_i915_engine_info engines[];
};
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG.
+ */
+struct drm_i915_query_perf_config {
+ union {
+ /*
+ * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets
+ * this fields to the number of configurations available.
+ */
+ __u64 n_configs;
+
+ /*
+ * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID,
+ * i915 will use the value in this field as configuration
+ * identifier to decide what data to write into config_ptr.
+ */
+ __u64 config;
+
+ /*
+ * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
+ * i915 will use the value in this field as configuration
+ * identifier to decide what data to write into config_ptr.
+ *
+ * String formatted like "%08x-%04x-%04x-%04x-%012x"
+ */
+ char uuid[36];
+ };
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will
+ * write an array of __u64 of configuration identifiers.
+ *
+ * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will
+ * write a struct drm_i915_perf_oa_config. If the following fields of
+ * drm_i915_perf_oa_config are set not set to 0, i915 will write into
+ * the associated pointers the values of submitted when the
+ * configuration was created :
+ *
+ * - n_mux_regs
+ * - n_boolean_regs
+ * - n_flex_regs
+ */
+ __u8 data[];
+};
+
#if defined(__cplusplus)
}
#endif