From 0b15205c7325dc20b7da0068307670d222d66949 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Tue, 12 Jul 2022 16:33:10 -0400 Subject: drm/amdgpu: limiting AV1 to first instance on VCN4 decode AV1 is only supported on first instance. Signed-off-by: Sonny Jiang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 131 ++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 84ac2401895a..a91ffbf902d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_hw_ip.h" @@ -44,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; +} + +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + uint32_t val; + int r = 0; + + /* The first instance can decode anything */ + if (!ring->me) + return r; + + /* unified queue ib header has 8 double words. */ + if (ib->length_dw < 8) + return r; + + val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + + if (decode_buffer->valid_buf_flag & 0x1) + r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo); + } + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + -- cgit v1.2.3 From f6a3f66063ca39e7ee5fcee59e889c5ec4de9dc0 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 13 Jul 2022 12:57:50 -0400 Subject: drm/amdgpu: Get rid of amdgpu_job->external_hw_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a follow-up cleanup to [1]. See bellow refcount balancing for calling amdgpu_job_submit_direct after this cleanup as far as I calculated. amdgpu_fence_emit dma_fence_init 1 dma_fence_get(fence) 2 rcu_assign_pointer(*ptr, dma_fence_get(fence) 3 ---> amdgpu_job_submit_direct completes before fence signaled amdgpu_sa_bo_free (*sa_bo)->fence = dma_fence_get(fence) 4 amdgpu_job_free dma_fence_put 3 amdgpu_vcn_enc_get_destroy_msg *fence = dma_fence_get(f) 4 dma_fence_put(f); 3 amdgpu_vcn_enc_ring_test_ib dma_fence_put(fence) 2 amdgpu_fence_process dma_fence_put 1 amdgpu_sa_bo_remove_locked dma_fence_put 0 ---> amdgpu_job_submit_direct completes after fence signaled amdgpu_fence_process dma_fence_put 2 amdgpu_job_free dma_fence_put 1 amdgpu_vcn_enc_get_destroy_msg *fence = dma_fence_get(f) 2 dma_fence_put(f); 1 amdgpu_vcn_enc_ring_test_ib dma_fence_put(fence) 0 [1] - https://patchwork.kernel.org/project/dri-devel/cover/20220624180955.485440-1-andrey.grodzovsky@amd.com/ Signed-off-by: Andrey Grodzovsky Suggested-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 27 +++++---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 - 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1c9587f659b..041bd906449d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5230,8 +5230,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && (job->hw_fence.ops != NULL) && - dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 36c1be77bf8f..5071b96be982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -133,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; - struct dma_fence *hw_fence; unsigned i; - if (job->hw_fence.ops == NULL) - hw_fence = job->external_hw_fence; - else - hw_fence = &job->hw_fence; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; + f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -156,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } void amdgpu_job_free(struct amdgpu_job *job) @@ -169,11 +159,7 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -203,15 +189,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->base.sched = &ring->sched; - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - /* record external_hw_fence for direct submit */ - job->external_hw_fence = dma_fence_get(*fence); + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence); + if (r) return r; amdgpu_job_free(job); - dma_fence_put(*fence); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46..babc0af751c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,7 +50,6 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; - struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; -- cgit v1.2.3 From 75510fac07cdde23c9217c8299b6cd64c689fb2b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 4 Jul 2022 17:16:38 +0800 Subject: drm/amdgpu: add umc v8_10_0 ip headers Add umc v8_10_0 register offset and shift masks header files Signed-off-by: YiPeng Chai Reviewed-by: Alexander Deucher Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../amd/include/asic_reg/umc/umc_8_10_0_offset.h | 33 ++++++++ .../amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h | 94 ++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h new file mode 100644 index 000000000000..b798cf5a2c39 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_10_0_OFFSET_HEADER +#define _umc_8_10_0_OFFSET_HEADER + +#define regUMCCH0_0_GeccErrCntSel 0x0328 +#define regUMCCH0_0_GeccErrCntSel_BASE_IDX 2 +#define regUMCCH0_0_GeccErrCnt 0x0329 +#define regUMCCH0_0_GeccErrCnt_BASE_IDX 2 +#define regMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2 +#define regMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 2 +#define regMCA_UMC_UMC0_MCUMC_ADDRT0 0x03c4 +#define regMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX 2 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h new file mode 100644 index 000000000000..bd99b431247f --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_10_0_SH_MASK_HEADER +#define _umc_8_10_0_SH_MASK_HEADER + +//UMCCH0_0_GeccErrCntSel +#define UMCCH0_0_GeccErrCntSel__GeccErrInt__SHIFT 0xc +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10 +#define UMCCH0_0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L +//UMCCH0_0_GeccErrCnt +#define UMCCH0_0_GeccErrCnt__GeccErrCnt__SHIFT 0x0 +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10 +#define UMCCH0_0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L +//MCA_UMC_UMC0_MCUMC_STATUST0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb__SHIFT 0x18 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30__SHIFT 0x1e +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22_MASK 0x0000000000C00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb_MASK 0x000000003F000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30_MASK 0x00000000C0000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL + +#endif -- cgit v1.2.3 From e4b1edf48fa37cf4f5ca403e384731fe28d13691 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 4 Jul 2022 17:18:14 +0800 Subject: drm/amdgpu: add umc ras functions for umc v8_10_0 1. Support query umc ras error counter. 2. Support ras umc ue error address remapping. Signed-off-by: YiPeng Chai Reviewed-by: Alexander Deucher Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 10 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 27 ++- drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 357 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v8_10.h | 70 +++++++ 5 files changed, 464 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v8_10.c create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v8_10.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index a87e42c2c8dc..c7d0cd15b5ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -93,7 +93,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2ec6698aa1fe..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -41,6 +41,12 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) +#define LOOP_UMC_NODE_INST(node_inst) \ + for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + +#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ + LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -62,6 +68,10 @@ struct amdgpu_umc { uint32_t channel_inst_num; /* number of umc instance with memory map register access */ uint32_t umc_inst_num; + + /*number of umc node instance with memory map register access*/ + uint32_t node_inst_num; + /* UMC regiser per channel offset */ uint32_t channel_offs; /* channel index table of interleaved memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index edbdc0b934ea..503e40a90319 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -25,7 +25,7 @@ #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" -#include "umc_v8_7.h" +#include "umc_v8_10.h" #include "athub/athub_3_0_0_sh_mask.h" #include "athub/athub_3_0_0_offset.h" #include "oss/osssys_6_0_0_offset.h" @@ -537,11 +537,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[UMC_HWIP][0]) { case IP_VERSION(8, 10, 0): + adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; + adev->umc.node_inst_num = adev->gmc.num_umc; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + adev->umc.ras = &umc_v8_10_ras; + break; case IP_VERSION(8, 11, 0): break; default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not define special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c new file mode 100644 index 000000000000..36a2053f2e8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_10.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_10_0_offset.h" +#include "umc/umc_8_10_0_sh_mask.h" + +#define UMC_8_NODE_DIST 0x800000 +#define UMC_8_INST_DIST 0x4000 + +struct channelnum_map_colbit { + uint32_t channel_num; + uint32_t col_bit; +}; + +const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { + {24, 13}, + {20, 13}, + {16, 12}, + {14, 12}, + {12, 12}, + {10, 12}, + {6, 11}, +}; + +const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{16, 18}, {17, 19}}, + {{15, 11}, {3, 7}}, + {{1, 5}, {13, 9}}, + {{23, 21}, {22, 20}}, + {{0, 4}, {12, 8}}, + {{14, 10}, {2, 6}} + }; + +static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst + + UMC_8_NODE_DIST * node_inst; +} + +static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_clear_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + /* UMC 8_10 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - + UMC_V8_10_CE_CNT_INIT); + + /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v8_10_clear_error_count(adev); +} + +static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num) +{ + uint32_t t = 0; + + for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++) + if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num) + return umc_v8_10_channelnum_map_colbit_table[t].col_bit; + + /* Failed to get col_bit. */ + return U32_MAX; +} + +/* + * Mapping normal address to soc physical address in swizzle mode. + */ +static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev, + uint32_t channel_idx, + uint64_t na, uint64_t *soc_pa) +{ + uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + uint32_t col_bit = umc_v8_10_get_col_bit(channel_num); + uint64_t tmp_addr; + + if (col_bit == U32_MAX) + return -1; + + tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx); + *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_MID(na, col_bit) | + SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_LSB(na); + + return 0; +} + +static void umc_v8_10_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t node_inst, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint32_t channel_index; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + uint32_t addr_lsb; + uint64_t mc_umc_addrt0; + + mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* the lowest lsb bits should be ignored */ + addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); + + err_addr &= ~((0x1ULL << addr_lsb) - 1); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + uint64_t na_err_addr, retired_page_addr; + uint32_t col = 0; + int ret = 0; + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; + } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); + } + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_error_address(adev, + err_data, + umc_reg_offset, + node_inst, + ch_inst, + umc_inst); + } +} + +static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); + } +} + +const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { + .query_ras_error_count = umc_v8_10_query_ras_error_count, + .query_ras_error_address = umc_v8_10_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v8_10_ras = { + .ras_block = { + .hw_ops = &umc_v8_10_ras_hw_ops, + }, + .err_cnt_init = umc_v8_10_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h new file mode 100644 index 000000000000..849ede88e111 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_10_H__ +#define __UMC_V8_10_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_10_UMC_INSTANCE_NUM 2 + +/* Total channel instances for all umc nodes */ +#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + +/* UMC regiser per channel offset */ +#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 + +/* EccErrCnt max value */ +#define UMC_V8_10_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD) + +#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4 + +/* The C5 bit in NA address */ +#define UMC_V8_10_NA_C5_BIT 14 + +/* Map to swizzle mode address */ +#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \ + ((((na) >> 10) * (ch_num) + (ch_idx)) << 10) +#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \ + (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2)) +#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit)) +#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \ + ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8) +#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF) + +extern struct amdgpu_umc_ras umc_v8_10_ras; +extern const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; + +#endif + -- cgit v1.2.3 From 604d3a3f0dca0ef0ab0322f5e7af8c70c5ddb6d4 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 6 Jul 2022 15:13:55 +0530 Subject: drm/amdgpu: fix for coding style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed below checkpatch warnings and errors drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:131: CHECK: Comparison to NULL could be written "apd" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:150: CHECK: Comparison to NULL could be written "apd" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:196: CHECK: Prefer kernel type 'u64' over 'uint64_t' drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:224: CHECK: Please don't use multiple blank lines drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:226: CHECK: Comparison to NULL could be written "!adev->acp.acp_genpd" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:233: CHECK: Please don't use multiple blank lines drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:239: CHECK: Alignment should match open parenthesis drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:241: CHECK: Comparison to NULL could be written "!adev->acp.acp_cell" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:247: CHECK: Comparison to NULL could be written "!adev->acp.acp_res" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:253: CHECK: Comparison to NULL could be written "!i2s_pdata" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:350: CHECK: Alignment should match open parenthesis drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:550: ERROR: that open brace { should be on the previous line Signed-off-by: Vijendar Mukunda Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 35 +++++++++++---------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index cc9c9f8b23b2..ceda19152c77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -128,16 +128,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to POWER GATE ACP block * smu will * 1. turn off the acp clock * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -147,16 +145,14 @@ static int acp_poweron(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to UNGATE ACP block * smu will * 1. exit ulv * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } @@ -193,7 +189,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data) static int acp_hw_init(void *handle) { int r; - uint64_t acp_base; + u64 acp_base; u32 val = 0; u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; @@ -220,37 +216,32 @@ static int acp_hw_init(void *handle) return -EINVAL; acp_base = adev->rmmio_base; - - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) + if (!adev->acp.acp_genpd) return -ENOMEM; adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd->gpd.power_off = acp_poweroff; adev->acp.acp_genpd->gpd.power_on = acp_poweron; - - adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), - GFP_KERNEL); + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) { + if (!adev->acp.acp_cell) { r = -ENOMEM; goto failure; } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (adev->acp.acp_res == NULL) { + if (!adev->acp.acp_res) { r = -ENOMEM; goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (i2s_pdata == NULL) { + if (!i2s_pdata) { r = -ENOMEM; goto failure; } @@ -346,8 +337,7 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, - ACP_DEVS); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) goto failure; @@ -546,8 +536,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_powergating_state = acp_set_powergating_state, }; -const struct amdgpu_ip_block_version acp_ip_block = -{ +const struct amdgpu_ip_block_version acp_ip_block = { .type = AMD_IP_BLOCK_TYPE_ACP, .major = 2, .minor = 2, -- cgit v1.2.3 From 748262eb400e809aa13e3485f4983c3db3d0ebb3 Mon Sep 17 00:00:00 2001 From: lin cao Date: Wed, 13 Jul 2022 18:20:58 +0800 Subject: drm/amdgpu: Call trace info was found in dmesg when loading amdgpu In the case of SRIOV, the register smnMp1_PMI_3_FIFO will get an invalid value which will cause the "shift out of bound". In Ubuntu22.04, this issue will be checked an related call trace will be reported in dmesg. Signed-off-by: lin cao Reviewed-by: Jingwen Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index b71860e5324a..fa520d79ef67 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -886,6 +886,7 @@ static void sienna_cichlid_stb_init(struct smu_context *smu); static int sienna_cichlid_init_smc_tables(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; int ret = 0; ret = sienna_cichlid_tables_init(smu); @@ -896,7 +897,8 @@ static int sienna_cichlid_init_smc_tables(struct smu_context *smu) if (ret) return ret; - sienna_cichlid_stb_init(smu); + if (!amdgpu_sriov_vf(adev)) + sienna_cichlid_stb_init(smu); return smu_v11_0_init_smc_tables(smu); } -- cgit v1.2.3 From 49062ee374f7a47443dac7e255bd419861de5b43 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 30 Jun 2022 07:56:50 +0530 Subject: drm/amdgpu: add dmi check for jadeite platform DMI check is required to distinguish Jadeite platform from Stoney base variant. Add DMI check logic for Jadeite platform. Signed-off-by: Vijendar Mukunda Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index ceda19152c77..4c265ad198b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "amdgpu.h" #include "atom.h" @@ -36,6 +38,7 @@ #include "acp_gfx_if.h" +#define ST_JADEITE 1 #define ACP_TILE_ON_MASK 0x03 #define ACP_TILE_OFF_MASK 0x02 #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f @@ -85,6 +88,8 @@ #define ACP_DEVS 4 #define ACP_SRC_ID 162 +static unsigned long acp_machine_id; + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -180,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data) return 0; } +static int acp_quirk_cb(const struct dmi_system_id *id) +{ + acp_machine_id = ST_JADEITE; + return 1; +} + +static const struct dmi_system_id acp_quirk_table[] = { + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"), + } + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"), + }, + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"), + }, + }, + {} +}; + /** * acp_hw_init - start and test ACP block * -- cgit v1.2.3 From 4c33e5179ff1a09cdfba38fa4fd260ae6735d3bb Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 30 Jun 2022 07:57:14 +0530 Subject: drm/amdgpu: create I2S platform devices for Jadeite platform Jadeite platform uses I2S MICSP instance. Create platform devices for DMA controller and I2S controller for Jadeite platform. Signed-off-by: Vijendar Mukunda Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 293 ++++++++++++++++++++------------ 1 file changed, 184 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 4c265ad198b8..bcc7ee02e0fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -262,125 +262,200 @@ static int acp_hw_init(void *handle) adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + dmi_check_system(acp_quirk_table); + switch (acp_machine_id) { + case ST_JADEITE: + { + adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell), + GFP_KERNEL); + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - - if (!adev->acp.acp_cell) { - r = -ENOMEM; - goto failure; - } - - adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (!adev->acp.acp_res) { - r = -ENOMEM; - goto failure; - } + adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } - i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (!i2s_pdata) { - r = -ENOMEM; - goto failure; - } + i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } - switch (adev->asic_type) { - case CHIP_STONEY: i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dma_irq"; + adev->acp.acp_res[2].flags = IORESOURCE_IRQ; + adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[2].end = adev->acp.acp_res[2].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 3; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2); + if (r) + goto failure; + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; break; - default: - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; } - i2s_pdata[0].cap = DWC_I2S_PLAY; - i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; - i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1 | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; default: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; - } + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), + GFP_KERNEL); - i2s_pdata[1].cap = DWC_I2S_RECORD; - i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; - i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; - default: - break; - } + adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } + + i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } + + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } + + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; - i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; - i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; - i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; - - adev->acp.acp_res[0].name = "acp2x_dma"; - adev->acp.acp_res[0].flags = IORESOURCE_MEM; - adev->acp.acp_res[0].start = acp_base; - adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; - - adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; - adev->acp.acp_res[1].flags = IORESOURCE_MEM; - adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; - adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; - - adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; - adev->acp.acp_res[2].flags = IORESOURCE_MEM; - adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; - adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; - - adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; - adev->acp.acp_res[3].flags = IORESOURCE_MEM; - adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; - adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; - - adev->acp.acp_res[4].name = "acp2x_dma_irq"; - adev->acp.acp_res[4].flags = IORESOURCE_IRQ; - adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); - adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; - - adev->acp.acp_cell[0].name = "acp_audio_dma"; - adev->acp.acp_cell[0].num_resources = 5; - adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; - adev->acp.acp_cell[0].platform_data = &adev->asic_type; - adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); - - adev->acp.acp_cell[1].name = "designware-i2s"; - adev->acp.acp_cell[1].num_resources = 1; - adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; - adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; - adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[2].name = "designware-i2s"; - adev->acp.acp_cell[2].num_resources = 1; - adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; - adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; - adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[3].name = "designware-i2s"; - adev->acp.acp_cell[3].num_resources = 1; - adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; - adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; - adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); - if (r) - goto failure; - - r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, - acp_genpd_add_device); - if (r) - goto failure; + i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + + i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; + i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; + + i2s_pdata[3].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[3].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; + adev->acp.acp_res[3].flags = IORESOURCE_MEM; + adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; + adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; + + adev->acp.acp_res[4].name = "acp2x_dma_irq"; + adev->acp.acp_res[4].flags = IORESOURCE_IRQ; + adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 5; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[3].name = "designware-i2s"; + adev->acp.acp_cell[3].num_resources = 1; + adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; + adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; + adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); + if (r) + goto failure; + + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); -- cgit v1.2.3 From 912db6a58738e8be502838eb6a88f207ba356cd7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Jul 2022 21:59:06 -0400 Subject: drm/amdgpu: use the same HDP flush registers for all nbio 7.4.x Align aldebaran with all other asics. One HDP bit per SDMA instance, aligned with firmware. This is effectively a revert of commit a0f9f8546668 ("drm/amdgpu/nbio7.4: don't use GPU_HDP_FLUSH bit 12"). On further discussions with the relevant hardware teams, re-align the bits for SDMA. Fixes: a0f9f8546668 ("drm/amdgpu/nbio7.4: don't use GPU_HDP_FLUSH bit 12") Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +---- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 21 --------------------- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 37234c2998d7..8fb7cf52c56d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2206,12 +2206,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77..11848d1e238b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4..f27c41728822 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; -- cgit v1.2.3 From 98a90f1f0fdd112b85b16ef6ceee69f319ab9311 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Jul 2022 22:04:56 -0400 Subject: drm/amdgpu: use the same HDP flush registers for all nbio 2.3.x Align RDNA2.x with other asics. One HDP bit per SDMA instance, aligned with firmware. This is effectively a revert of commit 369b7d04baf3 ("drm/amdgpu/nbio2.3: don't use GPU_HDP_FLUSH bit 12"). On further discussions with the relevant hardware teams, re-align the bits for SDMA. Fixes: 369b7d04baf3 ("drm/amdgpu/nbio2.3: don't use GPU_HDP_FLUSH bit 12") Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +---- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 21 --------------------- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8fb7cf52c56d..0ba56e2ebf09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2222,15 +2222,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 34c610b9157d..b465baa26762 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif -- cgit v1.2.3 From 958afce98c2c86732483458c03540d3c6ef45254 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 6 Jul 2022 11:10:15 -0400 Subject: drm/amdgpu: restore original stable pstate on ctx fini Save the original stable pstate on ctx init and restore it on ctx fini so that we restore a manually selected stable pstate on ctx exit. v2: fix init order (Alex) v3: don't add new variable to ctx struct (Evan) Fixes: c65b364c52ba ("drm/amdgpu/ctx: only reset stable pstate if the user changed it (v2)") Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 2ef5296216d6..8ee4e8491f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -272,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -326,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -397,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } -- cgit v1.2.3 From 3876a8b5e241081b2a519f848a65c00d8e6cd124 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 12 Jul 2022 15:42:47 -0700 Subject: drm/amd/display: Enable building new display engine with KCOV enabled The new display engine uses floating point math, which is not supported by KCOV. Commit 9d1d02ff3678 ("drm/amd/display: Don't build DCN1 when kcov is enabled") tried to work around the problem by disabling CONFIG_DRM_AMD_DC_DCN if KCOV_INSTRUMENT_ALL and KCOV_ENABLE_COMPARISONS are enabled. The result is that KCOV can not be enabled on systems which require this display engine. A much simpler and less invasive solution is to disable KCOV selectively when compiling the display enagine while keeping it enabled for the rest of the kernel. Fixes: 9d1d02ff3678 ("drm/amd/display: Don't build DCN1 when kcov is enabled") Cc: Arnd Bergmann Cc: Leo Li Reviewed-by: Harry Wentland Signed-off-by: Guenter Roeck Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/gpu/drm/amd/display/dc/Makefile | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index b4029c0d5d8c..96cbc87f7b6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 273f8f2c8e02..b9effadfc4bb 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -25,6 +25,9 @@ DC_LIBS = basics bios dml clk_mgr dce gpio irq link virtual ifdef CONFIG_DRM_AMD_DC_DCN + +KCOV_INSTRUMENT := n + DC_LIBS += dcn20 DC_LIBS += dsc DC_LIBS += dcn10 -- cgit v1.2.3 From 606ee059f85a9aabb0242cbc72852572a3c40432 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Jul 2022 11:34:27 +0100 Subject: drm/amd/display: Fix spelling mistake "supporing" -> "supporting" There is a spelling mistake in a dml_print message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 6101c962ab0a..fc4d7474c111 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -2994,7 +2994,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ImmediateFlipSupportedForPipe[k] == false) { #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Pipe %0d not supporing iflip\n", __func__, k); + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); #endif v->ImmediateFlipSupported = false; } -- cgit v1.2.3 From 133dc89c640f965c2736e70d8dec0699d763850d Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 14 Jul 2022 10:49:50 -0300 Subject: drm/amdgpu: Clarify asics naming in Kconfig options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clarify which architecture those asics acronyms refers to. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 74a8105fd2c0..7777d55275de 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,7 +4,7 @@ config DRM_AMDGPU_SI depends on DRM_AMDGPU help Choose this option if you want to enable experimental support - for SI asics. + for SI (Southern Islands) asics. SI is already supported in radeon. Experimental support for SI in amdgpu will be disabled by default and is still provided by @@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable support for CIK asics. + Choose this option if you want to enable support for CIK (Sea + Islands) asics. CIK is already supported in radeon. Support for CIK in amdgpu will be disabled by default and is still provided by radeon. -- cgit v1.2.3 From 2d04559e0660cf503d4e46cfbd7421d0b6156aa1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Jul 2022 14:33:37 +0300 Subject: drm/amd/display: Remove unnecessary NULL check in commit_planes_for_stream() Smatch complains that: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:3369 commit_planes_for_stream() warn: variable dereferenced before check 'stream' (see line 3114) The 'stream' pointer cannot be NULL and the check can be removed. Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 51c9563ad137..e42f44fc1c08 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3478,7 +3478,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); - if (stream && should_use_dmub_lock(stream->link)) { + if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; -- cgit v1.2.3 From 50fe04d46aab77cf8fc3d48fbc38d872d181a849 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 15:32:51 +0800 Subject: drm/amdgpu: introduce runtime pm mode It can benefit code consistency in future. Suggested-by: Lijo Lazar Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 +++++- drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6de63ea6687e..fbbcdbdbe65a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -161,10 +161,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (amdgpu_device_supports_px(dev) && (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { @@ -188,8 +190,10 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) amdgpu_runtime_pm_quirk(adev); - if (adev->runpm) + if (adev->runpm) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; dev_info(adev->dev, "Using BACO for runtime pm\n"); + } } /* Call ACPI methods: require modeset init diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 524fb09437e5..65624d091ed2 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -45,6 +45,13 @@ enum amdgpu_int_thermal_type { THERMAL_TYPE_KV, }; +enum amdgpu_runpm_mode { + AMDGPU_RUNPM_NONE, + AMDGPU_RUNPM_PX, + AMDGPU_RUNPM_BOCO, + AMDGPU_RUNPM_BACO, +}; + struct amdgpu_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -355,6 +362,8 @@ struct amdgpu_pm { struct amdgpu_ctx *stable_pstate_ctx; struct config_table_setting config_table; + /* runtime mode */ + enum amdgpu_runpm_mode rpm_mode; }; int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, -- cgit v1.2.3 From f746556aa9724d740c44861db0ac005ae50e1332 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 15:55:37 +0800 Subject: drm/amdgpu: skip SMU FW reloading in runpm BACO case SMU is always alive, so it's fine to skip SMU FW reloading when runpm resumed from BACO, this can avoid some race issues when resuming SMU. Suggested-by: Evan Quan Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9411c28d88b..6540582ecbf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2348,6 +2348,13 @@ static int psp_load_smu_fw(struct psp_context *psp) &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras_context.ras; + /* + * Skip SMU FW reloading in case of using BACO for runpm only, + * as SMU is always alive. + */ + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; -- cgit v1.2.3 From 75a9ad8c1b7960794c16182fd90852918ecdc0a6 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 16:01:03 +0800 Subject: drm/amdgpu: drop runtime pm disablement quirk on several sienna cichlid cards This quirk is not needed any more as it's fixed by bypassing SMU FW reloading in runtime resume. Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index fbbcdbdbe65a..a74fa195b379 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,17 +43,6 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) -{ - /* - * Add below quirk on several sienna_cichlid cards to disable - * runtime pm to fix EMI failures. - */ - if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || - ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) - adev->runpm = false; -} - void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -188,8 +177,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) break; } - amdgpu_runtime_pm_quirk(adev); - if (adev->runpm) { adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; dev_info(adev->dev, "Using BACO for runtime pm\n"); -- cgit v1.2.3 From 9c913f38032e873758d1b13fce720a0c2cbbfcb7 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 16:37:29 +0800 Subject: drm/amdgpu: drop runpm from amdgpu_device structure It's redundant, as now switching to rpm_mode to indicate runtime power management mode. Suggested-by: Lijo Lazar Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 23 ++++++++++------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2871a3e3801f..3aa8ae1bc35a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1011,7 +1011,6 @@ struct amdgpu_device { uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; /* enable runtime pm on the device */ - bool runpm; bool in_runpm; bool has_pr3; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3d139708160..4c66aff11a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2121,7 +2121,7 @@ retry_init: if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ if (amdgpu_device_supports_px(ddev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); @@ -2178,7 +2178,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -2461,7 +2461,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret, i; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } @@ -2530,7 +2530,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret; - if (!adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) return -EINVAL; /* Avoids registers access if device is physically gone */ @@ -2574,7 +2574,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ int ret = 1; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a74fa195b379..b8ba59c93fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -147,14 +147,13 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && @@ -162,25 +161,23 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - /* enable runpm if runpm=1 */ + /* enable BACO as runpm mode if runpm=1 */ if (amdgpu_runtime_pm > 0) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; case CHIP_VEGA10: - /* turn runpm on if noretry=0 */ + /* enable BACO as runpm mode if noretry=0 */ if (!adev->gmc.noretry) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: - /* enable runpm on CI+ */ - adev->runpm = true; + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } - if (adev->runpm) { - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) dev_info(adev->dev, "Using BACO for runtime pm\n"); - } } /* Call ACPI methods: require modeset init -- cgit v1.2.3 From 5877b7ddbc2502a7ddbc07970cf5c15972c22de4 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 31 Aug 2021 13:32:40 +0800 Subject: drm/amdgpu: correct the PSP_BL_CMD enum To match with the enum defined in trusted os Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e431f4994931..180634616b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -69,8 +69,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, - PSP_BL__LOAD_INTFDRV = 0xC0000, - PSP_BL__LOAD_DBGDRV = 0xD0000, + PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, -- cgit v1.2.3 From a53bc32182d7a2a934ca994239396fba21161c8e Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Sat, 16 Jul 2022 12:41:41 +0800 Subject: drm/amd/pm: enable mode1 reset for smu_v13_0_7 enable mode1 reset for smu_v13_0_7 since it's missing. Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 765c3543ad18..00e9b7089feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6259a85bc818..6f0548714566 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 0), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), }; -- cgit v1.2.3 From 4528c18605654bea022d294c9ba2e93600d05a94 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Thu, 14 Jul 2022 17:13:37 -0400 Subject: drm/amdgpu: add comment to HW_IP_VCN_ENC type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support both encoding and decoding jobs. Link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Reviewed-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..63de71f53110 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -559,6 +559,10 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ #define AMDGPU_HW_IP_VCN_ENC 7 #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 -- cgit v1.2.3 From 5df79aeb6e0890d6f5fd6e12c7ad238a1617b210 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 20 Jul 2022 15:04:18 -0400 Subject: drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Protect the struct amdgpu_bo_list with a mutex. This is used during command submission in order to avoid buffer object corruption as recorded in the link below. v2 (chk): Keep the mutex looked for the whole CS to avoid using the list from multiple CS threads at the same time. Suggested-by: Christian König Cc: Alex Deucher Cc: Andrey Grodzovsky Cc: Vitaly Prosyak Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2048 Signed-off-by: Luben Tuikov Signed-off-by: Christian König Tested-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..d8f1335bc68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; -- cgit v1.2.3 From 86bd6706c404ea703c11071d8b600d46bb98873c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 17 May 2022 12:19:06 -0500 Subject: drm/amdgpu: remove acc_size from reserve/unreserve mem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTM used to track the "acc_size" of all BOs internally. We needed to keep track of it in our memory reservation to avoid TTM running out of memory in its own accounting. However, that "acc_size" accounting has since been removed from TTM. Therefore we don't really need to track it any more. Signed-off-by: Alex Sierra Reviewed-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 60 +++++++----------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 581c7ae41102..d2d2b16c4d50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -115,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) - -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -143,19 +134,16 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - /* * Conservatively round up the allocation requirement to 2 MB * to avoid fragmentation caused by 4K allocations in the tail @@ -163,14 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -208,28 +192,18 @@ release: static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } -- cgit v1.2.3 From dc2b9c70ebca8e5661d33a15ed2b99d4510e90be Mon Sep 17 00:00:00 2001 From: Gavin Wan Date: Mon, 18 Jul 2022 15:30:51 -0400 Subject: drm/amdgpu: fix scratch register access method in SRIOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scratch register should be accessed through MMIO instead of RLCG in SRIOV, since it being used in RLCG register access function. Fixes: d54762cc3e6a ("drm/amdgpu: nuke dynamic gfx scratch reg allocation") Reviewed-by: Christian König Signed-off-by: Gavin Wan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e3..c6e0f9313a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; udelay(1); -- cgit v1.2.3 From 40835624efcde7f984cb859035b95b5a526d1a9f Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:44:56 -0300 Subject: drm/amdgpu: Write masked value to control register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the dce_v6_0 and dce_v8_0 hpd tear down callback, the tmp variable should be written into the control register instead of 0. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3caf6f386042..77f5e998a120 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 7c75df5bffed..802e5c753271 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } -- cgit v1.2.3 From fb3e8ed0438347c63650850c011c99deb59d7e62 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:44:57 -0300 Subject: drm/amd/display: Change get_pipe_idx function scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turn previously global function into a static function as it is not used outside the file. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 39f93072b5e0..c5a0a3649e9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -251,7 +251,7 @@ unsigned int get_total_surface_size_in_mall_bytes( return size; } -unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx) +static unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx) { int pipe_idx = -1; int i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 47b149d4bfcf..6e61b5382361 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -165,7 +165,6 @@ unsigned int get_total_surface_size_in_mall_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes); -unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx); bool get_is_phantom_pipe(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, -- cgit v1.2.3 From 615dc75fa6a7fc6cf029b01cdfc9d4b78919e71c Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 19 Jul 2022 21:15:29 +0800 Subject: drm/amd/pm: enable gfx ulv and gpo on smu_v13_0_7 enable gfx ulv and gpo on smu_v13_0_7 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6f0548714566..16eea2de8a2d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -251,6 +251,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) { *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_IMU_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); } if (adev->pm.pp_feature & PP_GFXOFF_MASK) @@ -271,6 +272,9 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); + if (adev->pm.pp_feature & PP_ULV_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_ULV_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MM_DPM_BIT); -- cgit v1.2.3 From 8a9899c95d1cd709d441960ca325c6c8184978bb Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 10 Mar 2021 23:40:01 +0800 Subject: drm/amd/display: Support vertical interrupt 0 for all dcn ASIC [Why] When CONFIG_DRM_AMD_SECURE_DISPLAY is enabled, it will try to register vertical interrupt 0 for specific task. Currently, only dcn10 have defined relevant info for vertical interrupt 0. If we enable CONFIG_DRM_AMD_SECURE_DISPLAY for other dcn ASIC, will get DC_IRQ_SOURCE_INVALID while calling dc_interrupt_to_irq_source() and cause pointer errors. [How] Add support of vertical interrupt 0 for all dcn ASIC. Tested-by: Daniel Wheeler Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c | 14 +++++++------- .../amd/display/dc/irq/dcn303/irq_service_dcn303.c | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index 146cd1819912..2aa74ee1502a 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -289,6 +289,13 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +#define dmub_trace_int_entry()\ + [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ + IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ + DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ + .funcs = &dmub_trace_irq_info_funcs\ + } + #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -297,13 +304,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vline0_irq_info_funcs\ } -#define dmub_trace_int_entry()\ - [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ - IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ - DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ - .funcs = &dmub_trace_irq_info_funcs\ - } - #define dummy_irq_entry() \ {\ .funcs = &dummy_irq_info_funcs\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c index 66e60762388e..1d149d290147 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c @@ -24,6 +24,10 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct irq_service *irq_servi return DC_IRQ_SOURCE_VBLANK1; case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP: return DC_IRQ_SOURCE_VBLANK2; + case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC1_VLINE0; + case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC2_VLINE0; case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT: return DC_IRQ_SOURCE_PFLIP1; case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT: @@ -96,6 +100,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vline0_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -164,6 +173,14 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +#define vline0_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ + .funcs = &vline0_irq_info_funcs\ + } + #define dummy_irq_entry() { .funcs = &dummy_irq_info_funcs } #define i2c_int_entry(reg_num) \ @@ -236,6 +253,8 @@ static const struct irq_source_info irq_source_info_dcn303[DAL_IRQ_SOURCES_NUMBE vupdate_no_lock_int_entry(1), vblank_int_entry(0), vblank_int_entry(1), + vline0_int_entry(0), + vline0_int_entry(1), }; static const struct irq_service_funcs irq_service_funcs_dcn303 = { -- cgit v1.2.3 From 0bf95a1ebd48866ad1438454061782d4d47765f7 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Wed, 25 Sep 2019 09:45:49 -0400 Subject: drm/amd/display: Remove unused variable Remove an unused variable "remove_disconnect_edp" which was a workaround bit. Tested-by: Daniel Wheeler Acked-by: Alan Liu Signed-off-by: Jun Lei Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c42377f0aae..be41f9fcf1dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -683,7 +683,6 @@ struct dc_debug_options { bool hdmi20_disable; bool skip_detection_link_training; uint32_t edid_read_retry_times; - bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst -- cgit v1.2.3 From 044b5cb9e8bfe4f006546fd98148e95489a6e803 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 27 Jul 2021 18:32:45 -0400 Subject: drm/amd/display: Update in dml Update DML to configure drr_display in vba struct. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Acked-by: Alan Liu Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 87c9b9f9976e..e8b094006d95 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -499,6 +499,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int refresh_rate; bool synchronize_timings; unsigned int odm_combine_policy; + bool drr_display; }; struct _vcs_dpi_display_pipe_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index c5a0a3649e9a..de78bb8489cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -697,6 +697,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz; + mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display; if (ip->is_line_buffer_bpp_fixed) mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] = ip->line_buffer_fixed_bpp; -- cgit v1.2.3 From aec4706b0e70e0cab2d0650b63cf2ddc1b154352 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 22 Jun 2022 10:59:01 +0800 Subject: drm/amd/display: Expose function reset_cur_dp_mst_topology [Why & How] Need to leverage this function out of dc_link.c. Change it to public. Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_link.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 858ee51f930a..ef54b96affa8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -845,7 +845,7 @@ static bool discover_dp_mst_topology(struct dc_link *link, enum dc_detect_reason return link->type == dc_connection_mst_branch; } -static bool reset_cur_dp_mst_topology(struct dc_link *link) +bool reset_cur_dp_mst_topology(struct dc_link *link) { bool result = false; DC_LOGGER_INIT(link->ctx->logger); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 023774b94da3..a0af0f6afeef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -514,4 +514,7 @@ bool dc_dp_trace_is_logged(struct dc_link *link, struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link, bool in_detection); unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); + +/* Destruct the mst topology of the link and reset the allocated payload table */ +bool reset_cur_dp_mst_topology(struct dc_link *link); #endif /* DC_LINK_H_ */ -- cgit v1.2.3 From c8a58ce18ca36b62749e326411176554462a5e2c Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 17 Aug 2021 18:14:42 +0800 Subject: drm/amd/display: fix trigger_hotplug to support mst case [Why & How] Correct few problems below to have debugfs trigger_hotplug entry supports mst case * Adjust the place for acquiring the hpd_lock. We'll also access dc_link when simulate unplug * When detect the connector is a mst root, call reset_cur_dp_mst_topology() to simulate unplug * Don't support hotplug caused by CSN message since we can't change mst topology info directly. We can't simulate that * Clean up redundant code Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index a1f40d0cd41c..e0ea350784e3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1256,14 +1256,22 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, return -EINVAL; } + kfree(wr_buf); + if (param_nums <= 0) { DRM_DEBUG_DRIVER("user data not be read\n"); - kfree(wr_buf); + return -EINVAL; + } + + mutex_lock(&aconnector->hpd_lock); + + /* Don't support for mst end device*/ + if (aconnector->mst_port) { + mutex_unlock(&aconnector->hpd_lock); return -EINVAL; } if (param[0] == 1) { - mutex_lock(&aconnector->hpd_lock); if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type) && new_connection_type != dc_connection_none) @@ -1300,6 +1308,10 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, amdgpu_dm_update_connector_after_detect(aconnector); + /* If the aconnector is the root node in mst topology */ + if (aconnector->mst_mgr.mst_state == true) + reset_cur_dp_mst_topology(link); + drm_modeset_lock_all(dev); dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); @@ -1310,7 +1322,6 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, unlock: mutex_unlock(&aconnector->hpd_lock); - kfree(wr_buf); return size; } -- cgit v1.2.3 From 8b076fa7c5befd1d3e1d892ae466f5334e4c6c99 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 17 Aug 2021 18:58:31 +0800 Subject: drm/amd/display: Add is_mst_connector debugfs entry [Why & How] Add "is_mst_connector" debugfs entry to help distinguish whether a connector is in a mst topology or not. Access it with the following command: cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector Result: - "root" stands for the root connector of the topology - "branch" stands for branch device of the topology - "end" stands for leaf node connector of the topology - "no" stands for the connector is not a device of a mst topology Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index e0ea350784e3..4eecf052d08d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2540,6 +2540,57 @@ static int target_backlight_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Determine if the connector is mst connector + * + * This function helps to determine whether a connector is a mst connector. + * - "root" stands for the root connector of the topology + * - "branch" stands for branch device of the topology + * - "end" stands for leaf node connector of the topology + * - "no" stands for the connector is not a device of a mst topology + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector + * + */ +static int dp_is_mst_connector_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct drm_dp_mst_topology_mgr *mgr = NULL; + struct drm_dp_mst_port *port = NULL; + char *role = NULL; + + mutex_lock(&aconnector->hpd_lock); + + if (aconnector->mst_mgr.mst_state) { + role = "root"; + } else if (aconnector->mst_port && + aconnector->mst_port->mst_mgr.mst_state) { + + role = "end"; + + mgr = &aconnector->mst_port->mst_mgr; + port = aconnector->port; + + drm_modeset_lock(&mgr->base.lock, NULL); + if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING && + port->mcs) + role = "branch"; + drm_modeset_unlock(&mgr->base.lock); + + } else { + role = "no"; + } + + seq_printf(m, "%s\n", role); + + mutex_unlock(&aconnector->hpd_lock); + + return 0; +} + + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2549,6 +2600,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); +DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2692,6 +2744,7 @@ static const struct { {"dp_dsc_fec_support", &dp_dsc_fec_support_fops}, {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, + {"is_mst_connector", &dp_is_mst_connector_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP -- cgit v1.2.3 From 25f7cde8bad9fe5943851d3d602e9fddb7977961 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 20 Jul 2022 15:11:56 -0400 Subject: drm/amd/display: Add tags for indicating mst progress status [Why & How] In order to leverage igt tool to maintain mst feature, expose new debugfs entry "mst_progress_status". In our dm flow, record down the result of each phase of mst and user can examine the mst result by checking whether each phase get completed successfully. Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 20 ++++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 46 +++++++++++++++++++++- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 18 ++++++++- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 13 ++++++ 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 7bd750e9f891..90b306a1dd68 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -571,6 +571,14 @@ struct dsc_preferred_settings { bool dsc_force_disable_passthrough; }; +enum mst_progress_status { + MST_STATUS_DEFAULT = 0, + MST_PROBE = BIT(0), + MST_REMOTE_EDID = BIT(1), + MST_ALLOCATE_NEW_PAYLOAD = BIT(2), + MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3), +}; + struct amdgpu_dm_connector { struct drm_connector base; @@ -623,8 +631,20 @@ struct amdgpu_dm_connector { struct drm_display_mode freesync_vid_base; int psr_skip_count; + + /* Record progress status of mst*/ + uint8_t mst_status; }; +static inline void amdgpu_dm_set_mst_status(uint8_t *status, + uint8_t flags, bool set) +{ + if (set) + *status |= flags; + else + *status &= ~flags; +} + #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) extern const struct amdgpu_ip_block_version dm_ip_block; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 4eecf052d08d..aa4edf182095 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -50,6 +50,13 @@ struct dmub_debugfs_trace_entry { uint32_t param1; }; +static const char *const mst_progress_status[] = { + "probe", + "remote_edid", + "allocate_new_payload", + "clear_allocated_payload", +}; + /* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array * * Function takes in attributes passed to debugfs write entry @@ -2590,6 +2597,41 @@ static int dp_is_mst_connector_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Read out the mst progress status + * + * This function helps to determine the mst progress status of + * a mst connector. + * + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status + * + */ +static int dp_mst_progress_status_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_device *adev = drm_to_adev(connector->dev); + int i; + + mutex_lock(&aconnector->hpd_lock); + mutex_lock(&adev->dm.dc_lock); + + if (aconnector->mst_status == MST_STATUS_DEFAULT) { + seq_puts(m, "disabled\n"); + } else { + for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++) + seq_printf(m, "%s:%s\n", + mst_progress_status[i], + aconnector->mst_status & BIT(i) ? "done" : "not_done"); + } + + mutex_unlock(&adev->dm.dc_lock); + mutex_unlock(&aconnector->hpd_lock); + + return 0; +} DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); @@ -2601,6 +2643,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); +DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2744,7 +2787,8 @@ static const struct { {"dp_dsc_fec_support", &dp_dsc_fec_support_fops}, {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, - {"is_mst_connector", &dp_is_mst_connector_fops} + {"is_mst_connector", &dp_is_mst_connector_fops}, + {"mst_progress_status", &dp_mst_progress_status_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 137645d40b72..d66e3cd64ebd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -312,6 +312,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; + enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD; + enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -325,8 +327,20 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (!mst_mgr->mst_state) return false; - /* It's OK for this to fail */ - drm_dp_update_payload_part2(mst_mgr); + if (!enable) { + set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; + clr_flag = MST_ALLOCATE_NEW_PAYLOAD; + } + + if (drm_dp_update_payload_part2(mst_mgr)) { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + set_flag, false); + } else { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + set_flag, true); + amdgpu_dm_set_mst_status(&aconnector->mst_status, + clr_flag, false); + } if (!enable) drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1f722309cfdd..2e74ccf7df5b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -179,6 +179,8 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) aconnector->dc_sink = NULL; aconnector->edid = NULL; } + + aconnector->mst_status = MST_STATUS_DEFAULT; drm_modeset_unlock(&root->mst_mgr.base.lock); } @@ -279,6 +281,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); if (!edid) { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID, false); + drm_connector_update_edid_property( &aconnector->base, NULL); @@ -309,6 +314,8 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) } aconnector->edid = edid; + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID, true); } if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) { @@ -430,6 +437,10 @@ dm_dp_mst_detect(struct drm_connector *connector, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; aconnector->edid = NULL; + + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, + false); } return connection_status; @@ -526,6 +537,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, connector = &aconnector->base; aconnector->port = port; aconnector->mst_port = master; + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_PROBE, true); if (drm_connector_init( dev, -- cgit v1.2.3 From 5d945cbcd4b16a29d6470a80dfb19738f9a4319f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 20 Jul 2022 15:31:42 -0400 Subject: drm/amd/display: Create a file dedicated to planes [Why] The amdgpu_dm file contains most of the code that works as an interface between DRM API and DC. As a result, this file becomes very large since it comprises multiple abstractions such as plane manipulation. [How] This commit extracts the plane code to its specific file named amdgpu_dm_plane. This change does not change anything inside the functions; the only exception is converting some static functions to a global function. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Acked-by: Alan Liu Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/Makefile | 7 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2220 ++++---------------- .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1645 +++++++++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h | 73 + 4 files changed, 2103 insertions(+), 1842 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 718e123a3230..ec559ea902a3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,7 +25,12 @@ -AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o +AMDGPUDM = \ + amdgpu_dm.o \ + amdgpu_dm_plane.o \ + amdgpu_dm_irq.o \ + amdgpu_dm_mst_types.o \ + amdgpu_dm_color.o ifdef CONFIG_DRM_AMD_DC_DCN AMDGPUDM += dc_fpu.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e83fed540e8..8a1821a2551c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -46,6 +46,7 @@ #include "amdgpu_ucode.h" #include "atom.h" #include "amdgpu_dm.h" +#include "amdgpu_dm_plane.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include @@ -206,10 +207,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap); static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t link_index); @@ -228,12 +225,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); -static void handle_cursor_update(struct drm_plane *plane, - struct drm_plane_state *old_plane_state); - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); - static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); @@ -4710,1147 +4701,265 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { .destroy = amdgpu_dm_encoder_destroy, }; - -static void get_min_max_dc_plane_scaling(struct drm_device *dev, - struct drm_framebuffer *fb, - int *min_downscale, int *max_upscale) +static int +fill_plane_color_attributes(const struct drm_plane_state *plane_state, + const enum surface_pixel_format format, + enum dc_color_space *color_space) { - struct amdgpu_device *adev = drm_to_adev(dev); - struct dc *dc = adev->dm.dc; - /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ - struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; + bool full_range; - switch (fb->format->format) { - case DRM_FORMAT_P010: - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV21: - *max_upscale = plane_cap->max_upscale_factor.nv12; - *min_downscale = plane_cap->max_downscale_factor.nv12; + *color_space = COLOR_SPACE_SRGB; + + /* DRM color properties only affect non-RGB formats. */ + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return 0; + + full_range = (plane_state->color_range == DRM_COLOR_YCBCR_FULL_RANGE); + + switch (plane_state->color_encoding) { + case DRM_COLOR_YCBCR_BT601: + if (full_range) + *color_space = COLOR_SPACE_YCBCR601; + else + *color_space = COLOR_SPACE_YCBCR601_LIMITED; break; - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - *max_upscale = plane_cap->max_upscale_factor.fp16; - *min_downscale = plane_cap->max_downscale_factor.fp16; + case DRM_COLOR_YCBCR_BT709: + if (full_range) + *color_space = COLOR_SPACE_YCBCR709; + else + *color_space = COLOR_SPACE_YCBCR709_LIMITED; break; - default: - *max_upscale = plane_cap->max_upscale_factor.argb8888; - *min_downscale = plane_cap->max_downscale_factor.argb8888; + case DRM_COLOR_YCBCR_BT2020: + if (full_range) + *color_space = COLOR_SPACE_2020_YCBCR; + else + return -EINVAL; break; - } - /* - * A factor of 1 in the plane_cap means to not allow scaling, ie. use a - * scaling factor of 1.0 == 1000 units. - */ - if (*max_upscale == 1) - *max_upscale = 1000; + default: + return -EINVAL; + } - if (*min_downscale == 1) - *min_downscale = 1000; + return 0; } - -static int fill_dc_scaling_info(struct amdgpu_device *adev, - const struct drm_plane_state *state, - struct dc_scaling_info *scaling_info) +static int +fill_dc_plane_info_and_addr(struct amdgpu_device *adev, + const struct drm_plane_state *plane_state, + const uint64_t tiling_flags, + struct dc_plane_info *plane_info, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc) { - int scale_w, scale_h, min_downscale, max_upscale; - - memset(scaling_info, 0, sizeof(*scaling_info)); + const struct drm_framebuffer *fb = plane_state->fb; + const struct amdgpu_framebuffer *afb = + to_amdgpu_framebuffer(plane_state->fb); + int ret; - /* Source is fixed 16.16 but we ignore mantissa for now... */ - scaling_info->src_rect.x = state->src_x >> 16; - scaling_info->src_rect.y = state->src_y >> 16; + memset(plane_info, 0, sizeof(*plane_info)); - /* - * For reasons we don't (yet) fully understand a non-zero - * src_y coordinate into an NV12 buffer can cause a - * system hang on DCN1x. - * To avoid hangs (and maybe be overly cautious) - * let's reject both non-zero src_x and src_y. - * - * We currently know of only one use-case to reproduce a - * scenario with non-zero src_x and src_y for NV12, which - * is to gesture the YouTube Android app into full screen - * on ChromeOS. - */ - if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) && - (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && - (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) + switch (fb->format->format) { + case DRM_FORMAT_C8: + plane_info->format = + SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS; + break; + case DRM_FORMAT_RGB565: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_RGB565; + break; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; + break; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010; + break; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010; + break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR8888; + break; + case DRM_FORMAT_NV21: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr; + break; + case DRM_FORMAT_NV12: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; + break; + case DRM_FORMAT_P010: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb; + break; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F; + break; + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F; + break; + case DRM_FORMAT_XRGB16161616: + case DRM_FORMAT_ARGB16161616: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616; + break; + case DRM_FORMAT_XBGR16161616: + case DRM_FORMAT_ABGR16161616: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; + break; + default: + DRM_ERROR( + "Unsupported screen format %p4cc\n", + &fb->format->format); return -EINVAL; + } - scaling_info->src_rect.width = state->src_w >> 16; - if (scaling_info->src_rect.width == 0) - return -EINVAL; + switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { + case DRM_MODE_ROTATE_0: + plane_info->rotation = ROTATION_ANGLE_0; + break; + case DRM_MODE_ROTATE_90: + plane_info->rotation = ROTATION_ANGLE_90; + break; + case DRM_MODE_ROTATE_180: + plane_info->rotation = ROTATION_ANGLE_180; + break; + case DRM_MODE_ROTATE_270: + plane_info->rotation = ROTATION_ANGLE_270; + break; + default: + plane_info->rotation = ROTATION_ANGLE_0; + break; + } - scaling_info->src_rect.height = state->src_h >> 16; - if (scaling_info->src_rect.height == 0) - return -EINVAL; - scaling_info->dst_rect.x = state->crtc_x; - scaling_info->dst_rect.y = state->crtc_y; + plane_info->visible = true; + plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; - if (state->crtc_w == 0) - return -EINVAL; + plane_info->layer_index = 0; - scaling_info->dst_rect.width = state->crtc_w; + ret = fill_plane_color_attributes(plane_state, plane_info->format, + &plane_info->color_space); + if (ret) + return ret; - if (state->crtc_h == 0) - return -EINVAL; + ret = fill_plane_buffer_attributes(adev, afb, plane_info->format, + plane_info->rotation, tiling_flags, + &plane_info->tiling_info, + &plane_info->plane_size, + &plane_info->dcc, address, + tmz_surface, force_disable_dcc); + if (ret) + return ret; - scaling_info->dst_rect.height = state->crtc_h; + fill_blending_from_plane_state( + plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha, + &plane_info->global_alpha, &plane_info->global_alpha_value); - /* DRM doesn't specify clipping on destination output. */ - scaling_info->clip_rect = scaling_info->dst_rect; + return 0; +} - /* Validate scaling per-format with DC plane caps */ - if (state->plane && state->plane->dev && state->fb) { - get_min_max_dc_plane_scaling(state->plane->dev, state->fb, - &min_downscale, &max_upscale); - } else { - min_downscale = 250; - max_upscale = 16000; - } +static int fill_dc_plane_attributes(struct amdgpu_device *adev, + struct dc_plane_state *dc_plane_state, + struct drm_plane_state *plane_state, + struct drm_crtc_state *crtc_state) +{ + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + struct amdgpu_framebuffer *afb = (struct amdgpu_framebuffer *)plane_state->fb; + struct dc_scaling_info scaling_info; + struct dc_plane_info plane_info; + int ret; + bool force_disable_dcc = false; - scale_w = scaling_info->dst_rect.width * 1000 / - scaling_info->src_rect.width; + ret = fill_dc_scaling_info(adev, plane_state, &scaling_info); + if (ret) + return ret; - if (scale_w < min_downscale || scale_w > max_upscale) - return -EINVAL; + dc_plane_state->src_rect = scaling_info.src_rect; + dc_plane_state->dst_rect = scaling_info.dst_rect; + dc_plane_state->clip_rect = scaling_info.clip_rect; + dc_plane_state->scaling_quality = scaling_info.scaling_quality; - scale_h = scaling_info->dst_rect.height * 1000 / - scaling_info->src_rect.height; + force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; + ret = fill_dc_plane_info_and_addr(adev, plane_state, + afb->tiling_flags, + &plane_info, + &dc_plane_state->address, + afb->tmz_surface, + force_disable_dcc); + if (ret) + return ret; - if (scale_h < min_downscale || scale_h > max_upscale) - return -EINVAL; + dc_plane_state->format = plane_info.format; + dc_plane_state->color_space = plane_info.color_space; + dc_plane_state->format = plane_info.format; + dc_plane_state->plane_size = plane_info.plane_size; + dc_plane_state->rotation = plane_info.rotation; + dc_plane_state->horizontal_mirror = plane_info.horizontal_mirror; + dc_plane_state->stereo_format = plane_info.stereo_format; + dc_plane_state->tiling_info = plane_info.tiling_info; + dc_plane_state->visible = plane_info.visible; + dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; + dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha; + dc_plane_state->global_alpha = plane_info.global_alpha; + dc_plane_state->global_alpha_value = plane_info.global_alpha_value; + dc_plane_state->dcc = plane_info.dcc; + dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 + dc_plane_state->flip_int_enabled = true; /* - * The "scaling_quality" can be ignored for now, quality = 0 has DC - * assume reasonable defaults based on the format. + * Always set input transfer function, since plane state is refreshed + * every time. */ + ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); + if (ret) + return ret; return 0; } -static void -fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, - uint64_t tiling_flags) -{ - /* Fill GFX8 params */ - if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { - unsigned int bankw, bankh, mtaspect, tile_split, num_banks; - - bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); - bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); - mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); - tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); - num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - - /* XXX fix me for VI */ - tiling_info->gfx8.num_banks = num_banks; - tiling_info->gfx8.array_mode = - DC_ARRAY_2D_TILED_THIN1; - tiling_info->gfx8.tile_split = tile_split; - tiling_info->gfx8.bank_width = bankw; - tiling_info->gfx8.bank_height = bankh; - tiling_info->gfx8.tile_aspect = mtaspect; - tiling_info->gfx8.tile_mode = - DC_ADDR_SURF_MICRO_TILING_DISPLAY; - } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) - == DC_ARRAY_1D_TILED_THIN1) { - tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; - } - - tiling_info->gfx8.pipe_config = - AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); -} +/** + * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates + * + * @plane: DRM plane containing dirty regions that need to be flushed to the eDP + * remote fb + * @old_plane_state: Old state of @plane + * @new_plane_state: New state of @plane + * @crtc_state: New state of CRTC connected to the @plane + * @flip_addrs: DC flip tracking struct, which also tracts dirty rects + * + * For PSR SU, DC informs the DMUB uController of dirty rectangle regions + * (referred to as "damage clips" in DRM nomenclature) that require updating on + * the eDP remote buffer. The responsibility of specifying the dirty regions is + * amdgpu_dm's. + * + * A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the + * plane with regions that require flushing to the eDP remote buffer. In + * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - + * implicitly provide damage clips without any client support via the plane + * bounds. + * + * Today, amdgpu_dm only supports the MPO and cursor usecase. + * + * TODO: Also enable for FB_DAMAGE_CLIPS + */ +static void fill_dc_dirty_rects(struct drm_plane *plane, + struct drm_plane_state *old_plane_state, + struct drm_plane_state *new_plane_state, + struct drm_crtc_state *crtc_state, + struct dc_flip_addrs *flip_addrs) +{ + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + struct rect *dirty_rects = flip_addrs->dirty_rects; + uint32_t num_clips; + bool bb_changed; + bool fb_changed; + uint32_t i = 0; -static void -fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info) -{ - tiling_info->gfx9.num_pipes = - adev->gfx.config.gb_addr_config_fields.num_pipes; - tiling_info->gfx9.num_banks = - adev->gfx.config.gb_addr_config_fields.num_banks; - tiling_info->gfx9.pipe_interleave = - adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; - tiling_info->gfx9.num_shader_engines = - adev->gfx.config.gb_addr_config_fields.num_se; - tiling_info->gfx9.max_compressed_frags = - adev->gfx.config.gb_addr_config_fields.max_compress_frags; - tiling_info->gfx9.num_rb_per_se = - adev->gfx.config.gb_addr_config_fields.num_rb_per_se; - tiling_info->gfx9.shaderEnable = 1; - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) - tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; -} - -static int -validate_dcc(struct amdgpu_device *adev, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const union dc_tiling_info *tiling_info, - const struct dc_plane_dcc_param *dcc, - const struct dc_plane_address *address, - const struct plane_size *plane_size) -{ - struct dc *dc = adev->dm.dc; - struct dc_dcc_surface_param input; - struct dc_surface_dcc_cap output; - - memset(&input, 0, sizeof(input)); - memset(&output, 0, sizeof(output)); - - if (!dcc->enable) - return 0; - - if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || - !dc->cap_funcs.get_dcc_compression_cap) - return -EINVAL; - - input.format = format; - input.surface_size.width = plane_size->surface_size.width; - input.surface_size.height = plane_size->surface_size.height; - input.swizzle_mode = tiling_info->gfx9.swizzle; - - if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) - input.scan = SCAN_DIRECTION_HORIZONTAL; - else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) - input.scan = SCAN_DIRECTION_VERTICAL; - - if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) - return -EINVAL; - - if (!output.capable) - return -EINVAL; - - if (dcc->independent_64b_blks == 0 && - output.grph.rgb.independent_64b_blks != 0) - return -EINVAL; - - return 0; -} - -static bool -modifier_has_dcc(uint64_t modifier) -{ - return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); -} - -static unsigned -modifier_gfx9_swizzle_mode(uint64_t modifier) -{ - if (modifier == DRM_FORMAT_MOD_LINEAR) - return 0; - - return AMD_FMT_MOD_GET(TILE, modifier); -} - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd) -{ - return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); -} - -static void -fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info, - uint64_t modifier) -{ - unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); - unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); - unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier); - unsigned int pipes_log2; - - pipes_log2 = min(5u, mod_pipe_xor_bits); - - fill_gfx9_tiling_info_from_device(adev, tiling_info); - - if (!IS_AMD_FMT_MOD(modifier)) - return; - - tiling_info->gfx9.num_pipes = 1u << pipes_log2; - tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2); - - if (adev->family >= AMDGPU_FAMILY_NV) { - tiling_info->gfx9.num_pkrs = 1u << pkrs_log2; - } else { - tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits; - - /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */ - } -} - -enum dm_micro_swizzle { - MICRO_SWIZZLE_Z = 0, - MICRO_SWIZZLE_S = 1, - MICRO_SWIZZLE_D = 2, - MICRO_SWIZZLE_R = 3 -}; - -static bool dm_plane_format_mod_supported(struct drm_plane *plane, - uint32_t format, - uint64_t modifier) -{ - struct amdgpu_device *adev = drm_to_adev(plane->dev); - const struct drm_format_info *info = drm_format_info(format); - int i; - - enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; - - if (!info) - return false; - - /* - * We always have to allow these modifiers: - * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. - * 2. Not passing any modifiers is the same as explicitly passing INVALID. - */ - if (modifier == DRM_FORMAT_MOD_LINEAR || - modifier == DRM_FORMAT_MOD_INVALID) { - return true; - } - - /* Check that the modifier is on the list of the plane's supported modifiers. */ - for (i = 0; i < plane->modifier_count; i++) { - if (modifier == plane->modifiers[i]) - break; - } - if (i == plane->modifier_count) - return false; - - /* - * For D swizzle the canonical modifier depends on the bpp, so check - * it here. - */ - if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && - adev->family >= AMDGPU_FAMILY_NV) { - if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4) - return false; - } - - if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D && - info->cpp[0] < 8) - return false; - - if (modifier_has_dcc(modifier)) { - /* Per radeonsi comments 16/64 bpp are more complicated. */ - if (info->cpp[0] != 4) - return false; - /* We support multi-planar formats, but not when combined with - * additional DCC metadata planes. */ - if (info->num_planes > 1) - return false; - } - - return true; -} - -static void -add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod) -{ - if (!*mods) - return; - - if (*cap - *size < 1) { - uint64_t new_cap = *cap * 2; - uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); - - if (!new_mods) { - kfree(*mods); - *mods = NULL; - return; - } - - memcpy(new_mods, *mods, sizeof(uint64_t) * *size); - kfree(*mods); - *mods = new_mods; - *cap = new_cap; - } - - (*mods)[*size] = mod; - *size += 1; -} - -static void -add_gfx9_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - int pipe_xor_bits = min(8, pipes + - ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); - int bank_xor_bits = min(8 - pipe_xor_bits, - ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); - int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + - ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); - - - if (adev->family == AMDGPU_FAMILY_RV) { - /* Raven2 and later */ - bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81; - - /* - * No _D DCC swizzles yet because we only allow 32bpp, which - * doesn't support _D on DCN - */ - - if (has_constant_encode) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1)); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0)); - - if (has_constant_encode) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(RB, rb) | - AMD_FMT_MOD_SET(PIPE, pipes)); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) | - AMD_FMT_MOD_SET(RB, rb) | - AMD_FMT_MOD_SET(PIPE, pipes)); - } - - /* - * Only supported for 64bpp on Raven, will be filtered on format in - * dm_plane_format_mod_supported. - */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); - - if (adev->family == AMDGPU_FAMILY_RV) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); - } - - /* - * Only supported for 64bpp on Raven, will be filtered on format in - * dm_plane_format_mod_supported. - */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - if (adev->family == AMDGPU_FAMILY_RV) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - } -} - -static void -add_gfx10_1_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); - - - /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); -} - -static void -add_gfx10_3_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs)); - - /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); -} - -static void -add_gfx11_modifiers(struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int num_pipes = 0; - int pipe_xor_bits = 0; - int num_pkrs = 0; - int pkrs = 0; - u32 gb_addr_config; - u8 i = 0; - unsigned swizzle_r_x; - uint64_t modifier_r_x; - uint64_t modifier_dcc_best; - uint64_t modifier_dcc_4k; - - /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from - * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} */ - gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); - ASSERT(gb_addr_config != 0); - - num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); - pkrs = ilog2(num_pkrs); - num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); - pipe_xor_bits = ilog2(num_pipes); - - for (i = 0; i < 2; i++) { - /* Insert the best one first. */ - /* R_X swizzle modes are the best for rendering and DCC requires them. */ - if (num_pipes > 16) - swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; - else - swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; - - modifier_r_x = AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(TILE, swizzle_r_x) | - AMD_FMT_MOD_SET(PACKERS, pkrs); - - /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ - modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); - - /* DCC settings for 4K and greater resolutions. (required by display hw) */ - modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); - - add_modifier(mods, size, capacity, modifier_dcc_best); - add_modifier(mods, size, capacity, modifier_dcc_4k); - - add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)); - add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)); - - add_modifier(mods, size, capacity, modifier_r_x); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)); -} - -static int -get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods) -{ - uint64_t size = 0, capacity = 128; - *mods = NULL; - - /* We have not hooked up any pre-GFX9 modifiers. */ - if (adev->family < AMDGPU_FAMILY_AI) - return 0; - - *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); - - if (plane_type == DRM_PLANE_TYPE_CURSOR) { - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); - return *mods ? 0 : -ENOMEM; - } - - switch (adev->family) { - case AMDGPU_FAMILY_AI: - case AMDGPU_FAMILY_RV: - add_gfx9_modifiers(adev, mods, &size, &capacity); - break; - case AMDGPU_FAMILY_NV: - case AMDGPU_FAMILY_VGH: - case AMDGPU_FAMILY_YC: - case AMDGPU_FAMILY_GC_10_3_6: - case AMDGPU_FAMILY_GC_10_3_7: - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) - add_gfx10_3_modifiers(adev, mods, &size, &capacity); - else - add_gfx10_1_modifiers(adev, mods, &size, &capacity); - break; - case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: - add_gfx11_modifiers(adev, mods, &size, &capacity); - break; - } - - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); - - /* INVALID marks the end of the list. */ - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); - - if (!*mods) - return -ENOMEM; - - return 0; -} - -static int -fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, - struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) -{ - const uint64_t modifier = afb->base.modifier; - int ret = 0; - - fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); - tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); - - if (modifier_has_dcc(modifier) && !force_disable_dcc) { - uint64_t dcc_address = afb->address + afb->base.offsets[1]; - bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); - bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); - - dcc->enable = 1; - dcc->meta_pitch = afb->base.pitches[1]; - dcc->independent_64b_blks = independent_64b_blks; - if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) { - if (independent_64b_blks && independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl; - else if (independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_128b; - else if (independent_64b_blks && !independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b; - else - dcc->dcc_ind_blk = hubp_ind_block_unconstrained; - } else { - if (independent_64b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b; - else - dcc->dcc_ind_blk = hubp_ind_block_unconstrained; - } - - address->grph.meta_addr.low_part = lower_32_bits(dcc_address); - address->grph.meta_addr.high_part = upper_32_bits(dcc_address); - } - - ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); - if (ret) - drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); - - return ret; -} - -static int -fill_plane_buffer_attributes(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, - struct plane_size *plane_size, - struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) -{ - const struct drm_framebuffer *fb = &afb->base; - int ret; - - memset(tiling_info, 0, sizeof(*tiling_info)); - memset(plane_size, 0, sizeof(*plane_size)); - memset(dcc, 0, sizeof(*dcc)); - memset(address, 0, sizeof(*address)); - - address->tmz_surface = tmz_surface; - - if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - uint64_t addr = afb->address + fb->offsets[0]; - - plane_size->surface_size.x = 0; - plane_size->surface_size.y = 0; - plane_size->surface_size.width = fb->width; - plane_size->surface_size.height = fb->height; - plane_size->surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - - address->type = PLN_ADDR_TYPE_GRAPHICS; - address->grph.addr.low_part = lower_32_bits(addr); - address->grph.addr.high_part = upper_32_bits(addr); - } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { - uint64_t luma_addr = afb->address + fb->offsets[0]; - uint64_t chroma_addr = afb->address + fb->offsets[1]; - - plane_size->surface_size.x = 0; - plane_size->surface_size.y = 0; - plane_size->surface_size.width = fb->width; - plane_size->surface_size.height = fb->height; - plane_size->surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - - plane_size->chroma_size.x = 0; - plane_size->chroma_size.y = 0; - /* TODO: set these based on surface format */ - plane_size->chroma_size.width = fb->width / 2; - plane_size->chroma_size.height = fb->height / 2; - - plane_size->chroma_pitch = - fb->pitches[1] / fb->format->cpp[1]; - - address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; - address->video_progressive.luma_addr.low_part = - lower_32_bits(luma_addr); - address->video_progressive.luma_addr.high_part = - upper_32_bits(luma_addr); - address->video_progressive.chroma_addr.low_part = - lower_32_bits(chroma_addr); - address->video_progressive.chroma_addr.high_part = - upper_32_bits(chroma_addr); - } - - if (adev->family >= AMDGPU_FAMILY_AI) { - ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, - rotation, plane_size, - tiling_info, dcc, - address, - force_disable_dcc); - if (ret) - return ret; - } else { - fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); - } - - return 0; -} - -static void -fill_blending_from_plane_state(const struct drm_plane_state *plane_state, - bool *per_pixel_alpha, bool *pre_multiplied_alpha, - bool *global_alpha, int *global_alpha_value) -{ - *per_pixel_alpha = false; - *pre_multiplied_alpha = true; - *global_alpha = false; - *global_alpha_value = 0xff; - - if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) - return; - - if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || - plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { - static const uint32_t alpha_formats[] = { - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_ABGR8888, - }; - uint32_t format = plane_state->fb->format->format; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { - if (format == alpha_formats[i]) { - *per_pixel_alpha = true; - break; - } - } - - if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) - *pre_multiplied_alpha = false; - } - - if (plane_state->alpha < 0xffff) { - *global_alpha = true; - *global_alpha_value = plane_state->alpha >> 8; - } -} - -static int -fill_plane_color_attributes(const struct drm_plane_state *plane_state, - const enum surface_pixel_format format, - enum dc_color_space *color_space) -{ - bool full_range; - - *color_space = COLOR_SPACE_SRGB; - - /* DRM color properties only affect non-RGB formats. */ - if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return 0; - - full_range = (plane_state->color_range == DRM_COLOR_YCBCR_FULL_RANGE); - - switch (plane_state->color_encoding) { - case DRM_COLOR_YCBCR_BT601: - if (full_range) - *color_space = COLOR_SPACE_YCBCR601; - else - *color_space = COLOR_SPACE_YCBCR601_LIMITED; - break; - - case DRM_COLOR_YCBCR_BT709: - if (full_range) - *color_space = COLOR_SPACE_YCBCR709; - else - *color_space = COLOR_SPACE_YCBCR709_LIMITED; - break; - - case DRM_COLOR_YCBCR_BT2020: - if (full_range) - *color_space = COLOR_SPACE_2020_YCBCR; - else - return -EINVAL; - break; - - default: - return -EINVAL; - } - - return 0; -} - -static int -fill_dc_plane_info_and_addr(struct amdgpu_device *adev, - const struct drm_plane_state *plane_state, - const uint64_t tiling_flags, - struct dc_plane_info *plane_info, - struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) -{ - const struct drm_framebuffer *fb = plane_state->fb; - const struct amdgpu_framebuffer *afb = - to_amdgpu_framebuffer(plane_state->fb); - int ret; - - memset(plane_info, 0, sizeof(*plane_info)); - - switch (fb->format->format) { - case DRM_FORMAT_C8: - plane_info->format = - SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS; - break; - case DRM_FORMAT_RGB565: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_RGB565; - break; - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; - break; - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010; - break; - case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010; - break; - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR8888; - break; - case DRM_FORMAT_NV21: - plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr; - break; - case DRM_FORMAT_NV12: - plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; - break; - case DRM_FORMAT_P010: - plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb; - break; - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F; - break; - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F; - break; - case DRM_FORMAT_XRGB16161616: - case DRM_FORMAT_ARGB16161616: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616; - break; - case DRM_FORMAT_XBGR16161616: - case DRM_FORMAT_ABGR16161616: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; - break; - default: - DRM_ERROR( - "Unsupported screen format %p4cc\n", - &fb->format->format); - return -EINVAL; - } - - switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { - case DRM_MODE_ROTATE_0: - plane_info->rotation = ROTATION_ANGLE_0; - break; - case DRM_MODE_ROTATE_90: - plane_info->rotation = ROTATION_ANGLE_90; - break; - case DRM_MODE_ROTATE_180: - plane_info->rotation = ROTATION_ANGLE_180; - break; - case DRM_MODE_ROTATE_270: - plane_info->rotation = ROTATION_ANGLE_270; - break; - default: - plane_info->rotation = ROTATION_ANGLE_0; - break; - } - - plane_info->visible = true; - plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; - - plane_info->layer_index = 0; - - ret = fill_plane_color_attributes(plane_state, plane_info->format, - &plane_info->color_space); - if (ret) - return ret; - - ret = fill_plane_buffer_attributes(adev, afb, plane_info->format, - plane_info->rotation, tiling_flags, - &plane_info->tiling_info, - &plane_info->plane_size, - &plane_info->dcc, address, tmz_surface, - force_disable_dcc); - if (ret) - return ret; - - fill_blending_from_plane_state( - plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha, - &plane_info->global_alpha, &plane_info->global_alpha_value); - - return 0; -} - -static int fill_dc_plane_attributes(struct amdgpu_device *adev, - struct dc_plane_state *dc_plane_state, - struct drm_plane_state *plane_state, - struct drm_crtc_state *crtc_state) -{ - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - struct amdgpu_framebuffer *afb = (struct amdgpu_framebuffer *)plane_state->fb; - struct dc_scaling_info scaling_info; - struct dc_plane_info plane_info; - int ret; - bool force_disable_dcc = false; - - ret = fill_dc_scaling_info(adev, plane_state, &scaling_info); - if (ret) - return ret; - - dc_plane_state->src_rect = scaling_info.src_rect; - dc_plane_state->dst_rect = scaling_info.dst_rect; - dc_plane_state->clip_rect = scaling_info.clip_rect; - dc_plane_state->scaling_quality = scaling_info.scaling_quality; - - force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; - ret = fill_dc_plane_info_and_addr(adev, plane_state, - afb->tiling_flags, - &plane_info, - &dc_plane_state->address, - afb->tmz_surface, - force_disable_dcc); - if (ret) - return ret; - - dc_plane_state->format = plane_info.format; - dc_plane_state->color_space = plane_info.color_space; - dc_plane_state->format = plane_info.format; - dc_plane_state->plane_size = plane_info.plane_size; - dc_plane_state->rotation = plane_info.rotation; - dc_plane_state->horizontal_mirror = plane_info.horizontal_mirror; - dc_plane_state->stereo_format = plane_info.stereo_format; - dc_plane_state->tiling_info = plane_info.tiling_info; - dc_plane_state->visible = plane_info.visible; - dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; - dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha; - dc_plane_state->global_alpha = plane_info.global_alpha; - dc_plane_state->global_alpha_value = plane_info.global_alpha_value; - dc_plane_state->dcc = plane_info.dcc; - dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 - dc_plane_state->flip_int_enabled = true; - - /* - * Always set input transfer function, since plane state is refreshed - * every time. - */ - ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); - if (ret) - return ret; - - return 0; -} - -/** - * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates - * - * @plane: DRM plane containing dirty regions that need to be flushed to the eDP - * remote fb - * @old_plane_state: Old state of @plane - * @new_plane_state: New state of @plane - * @crtc_state: New state of CRTC connected to the @plane - * @flip_addrs: DC flip tracking struct, which also tracts dirty rects - * - * For PSR SU, DC informs the DMUB uController of dirty rectangle regions - * (referred to as "damage clips" in DRM nomenclature) that require updating on - * the eDP remote buffer. The responsibility of specifying the dirty regions is - * amdgpu_dm's. - * - * A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the - * plane with regions that require flushing to the eDP remote buffer. In - * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - - * implicitly provide damage clips without any client support via the plane - * bounds. - * - * Today, amdgpu_dm only supports the MPO and cursor usecase. - * - * TODO: Also enable for FB_DAMAGE_CLIPS - */ -static void fill_dc_dirty_rects(struct drm_plane *plane, - struct drm_plane_state *old_plane_state, - struct drm_plane_state *new_plane_state, - struct drm_crtc_state *crtc_state, - struct dc_flip_addrs *flip_addrs) -{ - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - struct rect *dirty_rects = flip_addrs->dirty_rects; - uint32_t num_clips; - bool bb_changed; - bool fb_changed; - uint32_t i = 0; - - flip_addrs->dirty_rect_count = 0; + flip_addrs->dirty_rect_count = 0; /* * Cursor plane has it's own dirty rect update interface. See @@ -6168,7 +5277,7 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->scan_type = SCANNING_TYPE_NODATA; timing_out->hdmi_vic = 0; - if(old_stream) { + if (old_stream) { timing_out->vic = old_stream->timing.vic; timing_out->flags.HSYNC_POSITIVE_POLARITY = old_stream->timing.flags.HSYNC_POSITIVE_POLARITY; timing_out->flags.VSYNC_POSITIVE_POLARITY = old_stream->timing.flags.VSYNC_POSITIVE_POLARITY; @@ -6390,16 +5499,126 @@ static void dm_enable_per_frame_crtc_master_sync(struct dc_state *context) } } +/** + * DOC: FreeSync Video + * + * When a userspace application wants to play a video, the content follows a + * standard format definition that usually specifies the FPS for that format. + * The below list illustrates some video format and the expected FPS, + * respectively: + * + * - TV/NTSC (23.976 FPS) + * - Cinema (24 FPS) + * - TV/PAL (25 FPS) + * - TV/NTSC (29.97 FPS) + * - TV/NTSC (30 FPS) + * - Cinema HFR (48 FPS) + * - TV/PAL (50 FPS) + * - Commonly used (60 FPS) + * - Multiples of 24 (48,72,96 FPS) + * + * The list of standards video format is not huge and can be added to the + * connector modeset list beforehand. With that, userspace can leverage + * FreeSync to extends the front porch in order to attain the target refresh + * rate. Such a switch will happen seamlessly, without screen blanking or + * reprogramming of the output in any other way. If the userspace requests a + * modesetting change compatible with FreeSync modes that only differ in the + * refresh rate, DC will skip the full update and avoid blink during the + * transition. For example, the video player can change the modesetting from + * 60Hz to 30Hz for playing TV/NTSC content when it goes full screen without + * causing any display blink. This same concept can be applied to a mode + * setting change. + */ +static struct drm_display_mode * +get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, + bool use_probed_modes) +{ + struct drm_display_mode *m, *m_pref = NULL; + u16 current_refresh, highest_refresh; + struct list_head *list_head = use_probed_modes ? + &aconnector->base.probed_modes : + &aconnector->base.modes; + + if (aconnector->freesync_vid_base.clock != 0) + return &aconnector->freesync_vid_base; + + /* Find the preferred mode */ + list_for_each_entry (m, list_head, head) { + if (m->type & DRM_MODE_TYPE_PREFERRED) { + m_pref = m; + break; + } + } + + if (!m_pref) { + /* Probably an EDID with no preferred mode. Fallback to first entry */ + m_pref = list_first_entry_or_null( + &aconnector->base.modes, struct drm_display_mode, head); + if (!m_pref) { + DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); + return NULL; + } + } + + highest_refresh = drm_mode_vrefresh(m_pref); + + /* + * Find the mode with highest refresh rate with same resolution. + * For some monitors, preferred mode is not the mode with highest + * supported refresh rate. + */ + list_for_each_entry (m, list_head, head) { + current_refresh = drm_mode_vrefresh(m); + + if (m->hdisplay == m_pref->hdisplay && + m->vdisplay == m_pref->vdisplay && + highest_refresh < current_refresh) { + highest_refresh = current_refresh; + m_pref = m; + } + } + + drm_mode_copy(&aconnector->freesync_vid_base, m_pref); + return m_pref; +} + +static bool is_freesync_video_mode(const struct drm_display_mode *mode, + struct amdgpu_dm_connector *aconnector) +{ + struct drm_display_mode *high_mode; + int timing_diff; + + high_mode = get_highest_refresh_rate_mode(aconnector, false); + if (!high_mode || !mode) + return false; + + timing_diff = high_mode->vtotal - mode->vtotal; + + if (high_mode->clock == 0 || high_mode->clock != mode->clock || + high_mode->hdisplay != mode->hdisplay || + high_mode->vdisplay != mode->vdisplay || + high_mode->hsync_start != mode->hsync_start || + high_mode->hsync_end != mode->hsync_end || + high_mode->htotal != mode->htotal || + high_mode->hskew != mode->hskew || + high_mode->vscan != mode->vscan || + high_mode->vsync_start - mode->vsync_start != timing_diff || + high_mode->vsync_end - mode->vsync_end != timing_diff) + return false; + else + return true; +} + #if defined(CONFIG_DRM_AMD_DC_DCN) static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, - struct dc_sink *sink, struct dc_stream_state *stream, - struct dsc_dec_dpcd_caps *dsc_caps) + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps) { stream->timing.flags.DSC = 0; dsc_caps->is_dsc_supported = false; if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || - sink->sink_signal == SIGNAL_TYPE_EDP)) { + sink->sink_signal == SIGNAL_TYPE_EDP)) { if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE || sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, @@ -6409,6 +5628,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, } } + static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, struct dc_sink *sink, struct dc_stream_state *stream, struct dsc_dec_dpcd_caps *dsc_caps, @@ -6467,9 +5687,10 @@ static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, } } + static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, - struct dc_sink *sink, struct dc_stream_state *stream, - struct dsc_dec_dpcd_caps *dsc_caps) + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps) { struct drm_connector *drm_connector = &aconnector->base; uint32_t link_bandwidth_kbps; @@ -6480,7 +5701,6 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)); - if (stream->link && stream->link->local_sink) max_dsc_target_bpp_limit_override = stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit; @@ -6504,8 +5724,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, &stream->timing, &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", - __func__, drm_connector->name); + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); } } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing); @@ -6536,123 +5755,13 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_h) stream->timing.dsc_cfg.num_slices_h = aconnector->dsc_settings.dsc_num_slices_h; - if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_v) - stream->timing.dsc_cfg.num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; - - if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel) - stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel; -} -#endif /* CONFIG_DRM_AMD_DC_DCN */ - -/** - * DOC: FreeSync Video - * - * When a userspace application wants to play a video, the content follows a - * standard format definition that usually specifies the FPS for that format. - * The below list illustrates some video format and the expected FPS, - * respectively: - * - * - TV/NTSC (23.976 FPS) - * - Cinema (24 FPS) - * - TV/PAL (25 FPS) - * - TV/NTSC (29.97 FPS) - * - TV/NTSC (30 FPS) - * - Cinema HFR (48 FPS) - * - TV/PAL (50 FPS) - * - Commonly used (60 FPS) - * - Multiples of 24 (48,72,96,120 FPS) - * - * The list of standards video format is not huge and can be added to the - * connector modeset list beforehand. With that, userspace can leverage - * FreeSync to extends the front porch in order to attain the target refresh - * rate. Such a switch will happen seamlessly, without screen blanking or - * reprogramming of the output in any other way. If the userspace requests a - * modesetting change compatible with FreeSync modes that only differ in the - * refresh rate, DC will skip the full update and avoid blink during the - * transition. For example, the video player can change the modesetting from - * 60Hz to 30Hz for playing TV/NTSC content when it goes full screen without - * causing any display blink. This same concept can be applied to a mode - * setting change. - */ -static struct drm_display_mode * -get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, - bool use_probed_modes) -{ - struct drm_display_mode *m, *m_pref = NULL; - u16 current_refresh, highest_refresh; - struct list_head *list_head = use_probed_modes ? - &aconnector->base.probed_modes : - &aconnector->base.modes; - - if (aconnector->freesync_vid_base.clock != 0) - return &aconnector->freesync_vid_base; - - /* Find the preferred mode */ - list_for_each_entry (m, list_head, head) { - if (m->type & DRM_MODE_TYPE_PREFERRED) { - m_pref = m; - break; - } - } - - if (!m_pref) { - /* Probably an EDID with no preferred mode. Fallback to first entry */ - m_pref = list_first_entry_or_null( - &aconnector->base.modes, struct drm_display_mode, head); - if (!m_pref) { - DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); - return NULL; - } - } - - highest_refresh = drm_mode_vrefresh(m_pref); - - /* - * Find the mode with highest refresh rate with same resolution. - * For some monitors, preferred mode is not the mode with highest - * supported refresh rate. - */ - list_for_each_entry (m, list_head, head) { - current_refresh = drm_mode_vrefresh(m); - - if (m->hdisplay == m_pref->hdisplay && - m->vdisplay == m_pref->vdisplay && - highest_refresh < current_refresh) { - highest_refresh = current_refresh; - m_pref = m; - } - } - - drm_mode_copy(&aconnector->freesync_vid_base, m_pref); - return m_pref; -} - -static bool is_freesync_video_mode(const struct drm_display_mode *mode, - struct amdgpu_dm_connector *aconnector) -{ - struct drm_display_mode *high_mode; - int timing_diff; - - high_mode = get_highest_refresh_rate_mode(aconnector, false); - if (!high_mode || !mode) - return false; - - timing_diff = high_mode->vtotal - mode->vtotal; + if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_v) + stream->timing.dsc_cfg.num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; - if (high_mode->clock == 0 || high_mode->clock != mode->clock || - high_mode->hdisplay != mode->hdisplay || - high_mode->vdisplay != mode->vdisplay || - high_mode->hsync_start != mode->hsync_start || - high_mode->hsync_end != mode->hsync_end || - high_mode->htotal != mode->htotal || - high_mode->hskew != mode->hskew || - high_mode->vscan != mode->vscan || - high_mode->vsync_start - mode->vsync_start != timing_diff || - high_mode->vsync_end - mode->vsync_end != timing_diff) - return false; - else - return true; + if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel) + stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel; } +#endif /* CONFIG_DRM_AMD_DC_DCN */ static struct dc_stream_state * create_stream_for_sink(struct amdgpu_dm_connector *aconnector, @@ -6677,6 +5786,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif + struct dc_sink *sink = NULL; memset(&saved_mode, 0, sizeof(saved_mode)); @@ -6740,7 +5850,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_mode_copy(&mode, freesync_mode); } else { decide_crtc_timing_for_drm_display_mode( - &mode, preferred_mode, scale); + &mode, preferred_mode, scale); preferred_refresh = drm_mode_vrefresh(preferred_mode); } @@ -6751,7 +5861,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, else if (!dm_state) drm_mode_set_crtcinfo(&mode, 0); - /* + /* * If scaling is enabled and refresh rate didn't change * we copy the vic and polarities of the old timings */ @@ -6999,7 +6109,8 @@ amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) !aconnector->fake_enable) connected = (aconnector->dc_sink != NULL); else - connected = (aconnector->base.force == DRM_FORCE_ON); + connected = (aconnector->base.force == DRM_FORCE_ON || + aconnector->base.force == DRM_FORCE_ON_DIGITAL); update_subconnector_property(aconnector); @@ -7123,18 +6234,21 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) int i; /* - * Call only if mst_mgr was iniitalized before since it's not done + * Call only if mst_mgr was initialized before since it's not done * for all connector types. */ if (aconnector->mst_mgr.dev) drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr); +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ + defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) for (i = 0; i < dm->num_of_edps; i++) { if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) { backlight_device_unregister(dm->backlight_dev[i]); dm->backlight_dev[i] = NULL; } } +#endif if (aconnector->dc_em_sink) dc_sink_release(aconnector->dc_em_sink); @@ -7175,6 +6289,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->base.max_requested_bpc = 8; state->vcpi_slots = 0; state->pbn = 0; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) state->abm_level = amdgpu_dm_abm_level; @@ -7564,10 +6679,10 @@ static void dm_update_crtc_active_planes(struct drm_crtc *crtc, } static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, - struct drm_atomic_state *state) + struct drm_atomic_state *state) { struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, - crtc); + crtc); struct amdgpu_device *adev = drm_to_adev(crtc->dev); struct dc *dc = adev->dm.dc; struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); @@ -7578,7 +6693,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, dm_update_crtc_active_planes(crtc, crtc_state); if (WARN_ON(unlikely(!dm_crtc_state->stream && - modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { + modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { return ret; } @@ -7589,7 +6704,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. */ if (crtc_state->enable && - !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { + !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); return -EINVAL; } @@ -7627,21 +6742,21 @@ static void dm_encoder_helper_disable(struct drm_encoder *encoder) int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth) { switch (display_color_depth) { - case COLOR_DEPTH_666: - return 6; - case COLOR_DEPTH_888: - return 8; - case COLOR_DEPTH_101010: - return 10; - case COLOR_DEPTH_121212: - return 12; - case COLOR_DEPTH_141414: - return 14; - case COLOR_DEPTH_161616: - return 16; - default: - break; - } + case COLOR_DEPTH_666: + return 6; + case COLOR_DEPTH_888: + return 8; + case COLOR_DEPTH_101010: + return 10; + case COLOR_DEPTH_121212: + return 12; + case COLOR_DEPTH_141414: + return 14; + case COLOR_DEPTH_161616: + return 16; + default: + break; + } return 0; } @@ -7672,7 +6787,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) && - aconnector->force_yuv420_output; + aconnector->force_yuv420_output; color_depth = convert_color_depth_from_display_info(connector, is_y420, max_bpc); @@ -7727,7 +6842,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, if (!stream) continue; - if ((struct amdgpu_dm_connector*)stream->dm_stream_context == aconnector) + if ((struct amdgpu_dm_connector *)stream->dm_stream_context == aconnector) break; stream = NULL; @@ -7776,475 +6891,6 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } #endif -static void dm_drm_plane_reset(struct drm_plane *plane) -{ - struct dm_plane_state *amdgpu_state = NULL; - - if (plane->state) - plane->funcs->atomic_destroy_state(plane, plane->state); - - amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); - WARN_ON(amdgpu_state == NULL); - - if (amdgpu_state) - __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); -} - -static struct drm_plane_state * -dm_drm_plane_duplicate_state(struct drm_plane *plane) -{ - struct dm_plane_state *dm_plane_state, *old_dm_plane_state; - - old_dm_plane_state = to_dm_plane_state(plane->state); - dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL); - if (!dm_plane_state) - return NULL; - - __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base); - - if (old_dm_plane_state->dc_state) { - dm_plane_state->dc_state = old_dm_plane_state->dc_state; - dc_plane_state_retain(dm_plane_state->dc_state); - } - - return &dm_plane_state->base; -} - -static void dm_drm_plane_destroy_state(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); - - if (dm_plane_state->dc_state) - dc_plane_state_release(dm_plane_state->dc_state); - - drm_atomic_helper_plane_destroy_state(plane, state); -} - -static const struct drm_plane_funcs dm_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = drm_primary_helper_destroy, - .reset = dm_drm_plane_reset, - .atomic_duplicate_state = dm_drm_plane_duplicate_state, - .atomic_destroy_state = dm_drm_plane_destroy_state, - .format_mod_supported = dm_plane_format_mod_supported, -}; - -static int dm_plane_helper_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - struct amdgpu_framebuffer *afb; - struct drm_gem_object *obj; - struct amdgpu_device *adev; - struct amdgpu_bo *rbo; - struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; - uint32_t domain; - int r; - - if (!new_state->fb) { - DRM_DEBUG_KMS("No FB bound\n"); - return 0; - } - - afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; - rbo = gem_to_amdgpu_bo(obj); - adev = amdgpu_ttm_adev(rbo->tbo.bdev); - - r = amdgpu_bo_reserve(rbo, true); - if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); - return r; - } - - r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); - if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); - goto error_unlock; - } - - if (plane->type != DRM_PLANE_TYPE_CURSOR) - domain = amdgpu_display_supported_domains(adev, rbo->flags); - else - domain = AMDGPU_GEM_DOMAIN_VRAM; - - r = amdgpu_bo_pin(rbo, domain); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("Failed to pin framebuffer with error %d\n", r); - goto error_unlock; - } - - r = amdgpu_ttm_alloc_gart(&rbo->tbo); - if (unlikely(r != 0)) { - DRM_ERROR("%p bind failed\n", rbo); - goto error_unpin; - } - - r = drm_gem_plane_helper_prepare_fb(plane, new_state); - if (unlikely(r != 0)) - goto error_unpin; - - amdgpu_bo_unreserve(rbo); - - afb->address = amdgpu_bo_gpu_offset(rbo); - - amdgpu_bo_ref(rbo); - - /** - * We don't do surface updates on planes that have been newly created, - * but we also don't have the afb->address during atomic check. - * - * Fill in buffer attributes depending on the address here, but only on - * newly created planes since they're not being used by DC yet and this - * won't modify global state. - */ - dm_plane_state_old = to_dm_plane_state(plane->state); - dm_plane_state_new = to_dm_plane_state(new_state); - - if (dm_plane_state_new->dc_state && - dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { - struct dc_plane_state *plane_state = - dm_plane_state_new->dc_state; - bool force_disable_dcc = !plane_state->dcc.enable; - - fill_plane_buffer_attributes( - adev, afb, plane_state->format, plane_state->rotation, - afb->tiling_flags, - &plane_state->tiling_info, &plane_state->plane_size, - &plane_state->dcc, &plane_state->address, - afb->tmz_surface, force_disable_dcc); - } - - return 0; - -error_unpin: - amdgpu_bo_unpin(rbo); - -error_unlock: - amdgpu_bo_unreserve(rbo); - return r; -} - -static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - struct amdgpu_bo *rbo; - int r; - - if (!old_state->fb) - return; - - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); - r = amdgpu_bo_reserve(rbo, false); - if (unlikely(r)) { - DRM_ERROR("failed to reserve rbo before unpin\n"); - return; - } - - amdgpu_bo_unpin(rbo); - amdgpu_bo_unreserve(rbo); - amdgpu_bo_unref(&rbo); -} - -static int dm_plane_helper_check_state(struct drm_plane_state *state, - struct drm_crtc_state *new_crtc_state) -{ - struct drm_framebuffer *fb = state->fb; - int min_downscale, max_upscale; - int min_scale = 0; - int max_scale = INT_MAX; - - /* Plane enabled? Validate viewport and get scaling factors from plane caps. */ - if (fb && state->crtc) { - /* Validate viewport to cover the case when only the position changes */ - if (state->plane->type != DRM_PLANE_TYPE_CURSOR) { - int viewport_width = state->crtc_w; - int viewport_height = state->crtc_h; - - if (state->crtc_x < 0) - viewport_width += state->crtc_x; - else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay) - viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x; - - if (state->crtc_y < 0) - viewport_height += state->crtc_y; - else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay) - viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y; - - if (viewport_width < 0 || viewport_height < 0) { - DRM_DEBUG_ATOMIC("Plane completely outside of screen\n"); - return -EINVAL; - } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */ - DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2); - return -EINVAL; - } else if (viewport_height < MIN_VIEWPORT_SIZE) { - DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE); - return -EINVAL; - } - - } - - /* Get min/max allowed scaling factors from plane caps. */ - get_min_max_dc_plane_scaling(state->crtc->dev, fb, - &min_downscale, &max_upscale); - /* - * Convert to drm convention: 16.16 fixed point, instead of dc's - * 1.0 == 1000. Also drm scaling is src/dst instead of dc's - * dst/src, so min_scale = 1.0 / max_upscale, etc. - */ - min_scale = (1000 << 16) / max_upscale; - max_scale = (1000 << 16) / min_downscale; - } - - return drm_atomic_helper_check_plane_state( - state, new_crtc_state, min_scale, max_scale, true, true); -} - -static int dm_plane_atomic_check(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, - plane); - struct amdgpu_device *adev = drm_to_adev(plane->dev); - struct dc *dc = adev->dm.dc; - struct dm_plane_state *dm_plane_state; - struct dc_scaling_info scaling_info; - struct drm_crtc_state *new_crtc_state; - int ret; - - trace_amdgpu_dm_plane_atomic_check(new_plane_state); - - dm_plane_state = to_dm_plane_state(new_plane_state); - - if (!dm_plane_state->dc_state) - return 0; - - new_crtc_state = - drm_atomic_get_new_crtc_state(state, - new_plane_state->crtc); - if (!new_crtc_state) - return -EINVAL; - - ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state); - if (ret) - return ret; - - ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info); - if (ret) - return ret; - - if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) - return 0; - - return -EINVAL; -} - -static int dm_plane_atomic_async_check(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - /* Only support async updates on cursor planes. */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) - return -EINVAL; - - return 0; -} - -static void dm_plane_atomic_async_update(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); - struct drm_plane_state *old_state = - drm_atomic_get_old_plane_state(state, plane); - - trace_amdgpu_dm_atomic_update_cursor(new_state); - - swap(plane->state->fb, new_state->fb); - - plane->state->src_x = new_state->src_x; - plane->state->src_y = new_state->src_y; - plane->state->src_w = new_state->src_w; - plane->state->src_h = new_state->src_h; - plane->state->crtc_x = new_state->crtc_x; - plane->state->crtc_y = new_state->crtc_y; - plane->state->crtc_w = new_state->crtc_w; - plane->state->crtc_h = new_state->crtc_h; - - handle_cursor_update(plane, old_state); -} - -static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { - .prepare_fb = dm_plane_helper_prepare_fb, - .cleanup_fb = dm_plane_helper_cleanup_fb, - .atomic_check = dm_plane_atomic_check, - .atomic_async_check = dm_plane_atomic_async_check, - .atomic_async_update = dm_plane_atomic_async_update -}; - -/* - * TODO: these are currently initialized to rgb formats only. - * For future use cases we should either initialize them dynamically based on - * plane capabilities, or initialize this array to all formats, so internal drm - * check will succeed, and let DC implement proper check - */ -static const uint32_t rgb_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_XRGB2101010, - DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ARGB2101010, - DRM_FORMAT_ABGR2101010, - DRM_FORMAT_XRGB16161616, - DRM_FORMAT_XBGR16161616, - DRM_FORMAT_ARGB16161616, - DRM_FORMAT_ABGR16161616, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_RGB565, -}; - -static const uint32_t overlay_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_RGB565 -}; - -static const u32 cursor_formats[] = { - DRM_FORMAT_ARGB8888 -}; - -static int get_plane_formats(const struct drm_plane *plane, - const struct dc_plane_cap *plane_cap, - uint32_t *formats, int max_formats) -{ - int i, num_formats = 0; - - /* - * TODO: Query support for each group of formats directly from - * DC plane caps. This will require adding more formats to the - * caps list. - */ - - switch (plane->type) { - case DRM_PLANE_TYPE_PRIMARY: - for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = rgb_formats[i]; - } - - if (plane_cap && plane_cap->pixel_format_support.nv12) - formats[num_formats++] = DRM_FORMAT_NV12; - if (plane_cap && plane_cap->pixel_format_support.p010) - formats[num_formats++] = DRM_FORMAT_P010; - if (plane_cap && plane_cap->pixel_format_support.fp16) { - formats[num_formats++] = DRM_FORMAT_XRGB16161616F; - formats[num_formats++] = DRM_FORMAT_ARGB16161616F; - formats[num_formats++] = DRM_FORMAT_XBGR16161616F; - formats[num_formats++] = DRM_FORMAT_ABGR16161616F; - } - break; - - case DRM_PLANE_TYPE_OVERLAY: - for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = overlay_formats[i]; - } - break; - - case DRM_PLANE_TYPE_CURSOR: - for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = cursor_formats[i]; - } - break; - } - - return num_formats; -} - -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap) -{ - uint32_t formats[32]; - int num_formats; - int res = -EPERM; - unsigned int supported_rotations; - uint64_t *modifiers = NULL; - - num_formats = get_plane_formats(plane, plane_cap, formats, - ARRAY_SIZE(formats)); - - res = get_plane_modifiers(dm->adev, plane->type, &modifiers); - if (res) - return res; - - if (modifiers == NULL) - adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; - - res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, - &dm_plane_funcs, formats, num_formats, - modifiers, plane->type, NULL); - kfree(modifiers); - if (res) - return res; - - if (plane->type == DRM_PLANE_TYPE_OVERLAY && - plane_cap && plane_cap->per_pixel_alpha) { - unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | - BIT(DRM_MODE_BLEND_PREMULTI) | - BIT(DRM_MODE_BLEND_COVERAGE); - - drm_plane_create_alpha_property(plane); - drm_plane_create_blend_mode_property(plane, blend_caps); - } - - if (plane->type == DRM_PLANE_TYPE_PRIMARY && - plane_cap && - (plane_cap->pixel_format_support.nv12 || - plane_cap->pixel_format_support.p010)) { - /* This only affects YUV formats. */ - drm_plane_create_color_properties( - plane, - BIT(DRM_COLOR_YCBCR_BT601) | - BIT(DRM_COLOR_YCBCR_BT709) | - BIT(DRM_COLOR_YCBCR_BT2020), - BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | - BIT(DRM_COLOR_YCBCR_FULL_RANGE), - DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); - } - - supported_rotations = - DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | - DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; - - if (dm->adev->asic_type >= CHIP_BONAIRE && - plane->type != DRM_PLANE_TYPE_CURSOR) - drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, - supported_rotations); - - drm_plane_helper_add(plane, &dm_plane_helper_funcs); - - /* Create (reset) the plane state */ - if (plane->funcs->reset) - plane->funcs->reset(plane); - - return 0; -} - static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t crtc_index) @@ -9084,114 +7730,6 @@ static void remove_stream(struct amdgpu_device *adev, acrtc->enabled = false; } -static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, - struct dc_cursor_position *position) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int x, y; - int xorigin = 0, yorigin = 0; - - if (!crtc || !plane->state->fb) - return 0; - - if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || - (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { - DRM_ERROR("%s: bad cursor width or height %d x %d\n", - __func__, - plane->state->crtc_w, - plane->state->crtc_h); - return -EINVAL; - } - - x = plane->state->crtc_x; - y = plane->state->crtc_y; - - if (x <= -amdgpu_crtc->max_cursor_width || - y <= -amdgpu_crtc->max_cursor_height) - return 0; - - if (x < 0) { - xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { - yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); - y = 0; - } - position->enable = true; - position->translate_by_source = true; - position->x = x; - position->y = y; - position->x_hotspot = xorigin; - position->y_hotspot = yorigin; - - return 0; -} - -static void handle_cursor_update(struct drm_plane *plane, - struct drm_plane_state *old_plane_state) -{ - struct amdgpu_device *adev = drm_to_adev(plane->dev); - struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); - struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; - struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - uint64_t address = afb ? afb->address : 0; - struct dc_cursor_position position = {0}; - struct dc_cursor_attributes attributes; - int ret; - - if (!plane->state->fb && !old_plane_state->fb) - return; - - DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n", - __func__, - amdgpu_crtc->crtc_id, - plane->state->crtc_w, - plane->state->crtc_h); - - ret = get_cursor_position(plane, crtc, &position); - if (ret) - return; - - if (!position.enable) { - /* turn off cursor */ - if (crtc_state && crtc_state->stream) { - mutex_lock(&adev->dm.dc_lock); - dc_stream_set_cursor_position(crtc_state->stream, - &position); - mutex_unlock(&adev->dm.dc_lock); - } - return; - } - - amdgpu_crtc->cursor_width = plane->state->crtc_w; - amdgpu_crtc->cursor_height = plane->state->crtc_h; - - memset(&attributes, 0, sizeof(attributes)); - attributes.address.high_part = upper_32_bits(address); - attributes.address.low_part = lower_32_bits(address); - attributes.width = plane->state->crtc_w; - attributes.height = plane->state->crtc_h; - attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA; - attributes.rotation_angle = 0; - attributes.attribute_flags.value = 0; - - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; - - if (crtc_state->stream) { - mutex_lock(&adev->dm.dc_lock); - if (!dc_stream_set_cursor_attributes(crtc_state->stream, - &attributes)) - DRM_ERROR("DC failed to set cursor attributes\n"); - - if (!dc_stream_set_cursor_position(crtc_state->stream, - &position)) - DRM_ERROR("DC failed to set cursor position\n"); - mutex_unlock(&adev->dm.dc_lock); - } -} - static void prepare_flip_isr(struct amdgpu_crtc *acrtc) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c new file mode 100644 index 000000000000..4702a53a0bf9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -0,0 +1,1645 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include +#include +#include +#include +#include + +#include "amdgpu.h" +#include "dal_asic_id.h" +#include "amdgpu_display.h" +#include "amdgpu_dm_trace.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" + +/* + * TODO: these are currently initialized to rgb formats only. + * For future use cases we should either initialize them dynamically based on + * plane capabilities, or initialize this array to all formats, so internal drm + * check will succeed, and let DC implement proper check + */ +static const uint32_t rgb_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XRGB16161616, + DRM_FORMAT_XBGR16161616, + DRM_FORMAT_ARGB16161616, + DRM_FORMAT_ABGR16161616, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565, +}; + +static const uint32_t overlay_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565 +}; + +static const u32 cursor_formats[] = { + DRM_FORMAT_ARGB8888 +}; + +enum dm_micro_swizzle { + MICRO_SWIZZLE_Z = 0, + MICRO_SWIZZLE_S = 1, + MICRO_SWIZZLE_D = 2, + MICRO_SWIZZLE_R = 3 +}; + +const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +{ + return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); +} + +void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value) +{ + *per_pixel_alpha = false; + *pre_multiplied_alpha = true; + *global_alpha = false; + *global_alpha_value = 0xff; + + if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) + return; + + if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || + plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { + static const uint32_t alpha_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_ABGR8888, + }; + uint32_t format = plane_state->fb->format->format; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { + if (format == alpha_formats[i]) { + *per_pixel_alpha = true; + break; + } + } + + if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + *pre_multiplied_alpha = false; + } + + if (plane_state->alpha < 0xffff) { + *global_alpha = true; + *global_alpha_value = plane_state->alpha >> 8; + } +} + +static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod) +{ + if (!*mods) + return; + + if (*cap - *size < 1) { + uint64_t new_cap = *cap * 2; + uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); + + if (!new_mods) { + kfree(*mods); + *mods = NULL; + return; + } + + memcpy(new_mods, *mods, sizeof(uint64_t) * *size); + kfree(*mods); + *mods = new_mods; + *cap = new_cap; + } + + (*mods)[*size] = mod; + *size += 1; +} + +bool modifier_has_dcc(uint64_t modifier) +{ + return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); +} + +unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) +{ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return 0; + + return AMD_FMT_MOD_GET(TILE, modifier); +} + +static void fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, + uint64_t tiling_flags) +{ + /* Fill GFX8 params */ + if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { + unsigned int bankw, bankh, mtaspect, tile_split, num_banks; + + bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); + bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); + mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); + tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); + num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + + /* XXX fix me for VI */ + tiling_info->gfx8.num_banks = num_banks; + tiling_info->gfx8.array_mode = + DC_ARRAY_2D_TILED_THIN1; + tiling_info->gfx8.tile_split = tile_split; + tiling_info->gfx8.bank_width = bankw; + tiling_info->gfx8.bank_height = bankh; + tiling_info->gfx8.tile_aspect = mtaspect; + tiling_info->gfx8.tile_mode = + DC_ADDR_SURF_MICRO_TILING_DISPLAY; + } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) + == DC_ARRAY_1D_TILED_THIN1) { + tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; + } + + tiling_info->gfx8.pipe_config = + AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); +} + +static void fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, + union dc_tiling_info *tiling_info) +{ + /* Fill GFX9 params */ + tiling_info->gfx9.num_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + tiling_info->gfx9.num_banks = + adev->gfx.config.gb_addr_config_fields.num_banks; + tiling_info->gfx9.pipe_interleave = + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; + tiling_info->gfx9.num_shader_engines = + adev->gfx.config.gb_addr_config_fields.num_se; + tiling_info->gfx9.max_compressed_frags = + adev->gfx.config.gb_addr_config_fields.max_compress_frags; + tiling_info->gfx9.num_rb_per_se = + adev->gfx.config.gb_addr_config_fields.num_rb_per_se; + tiling_info->gfx9.shaderEnable = 1; + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; +} + +static void fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, + union dc_tiling_info *tiling_info, + uint64_t modifier) +{ + unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); + unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); + unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier); + unsigned int pipes_log2; + + pipes_log2 = min(5u, mod_pipe_xor_bits); + + fill_gfx9_tiling_info_from_device(adev, tiling_info); + + if (!IS_AMD_FMT_MOD(modifier)) + return; + + tiling_info->gfx9.num_pipes = 1u << pipes_log2; + tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2); + + if (adev->family >= AMDGPU_FAMILY_NV) { + tiling_info->gfx9.num_pkrs = 1u << pkrs_log2; + } else { + tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits; + + /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */ + } +} + +static int validate_dcc(struct amdgpu_device *adev, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const union dc_tiling_info *tiling_info, + const struct dc_plane_dcc_param *dcc, + const struct dc_plane_address *address, + const struct plane_size *plane_size) +{ + struct dc *dc = adev->dm.dc; + struct dc_dcc_surface_param input; + struct dc_surface_dcc_cap output; + + memset(&input, 0, sizeof(input)); + memset(&output, 0, sizeof(output)); + + if (!dcc->enable) + return 0; + + if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || + !dc->cap_funcs.get_dcc_compression_cap) + return -EINVAL; + + input.format = format; + input.surface_size.width = plane_size->surface_size.width; + input.surface_size.height = plane_size->surface_size.height; + input.swizzle_mode = tiling_info->gfx9.swizzle; + + if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) + input.scan = SCAN_DIRECTION_HORIZONTAL; + else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) + input.scan = SCAN_DIRECTION_VERTICAL; + + if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) + return -EINVAL; + + if (!output.capable) + return -EINVAL; + + if (dcc->independent_64b_blks == 0 && + output.grph.rgb.independent_64b_blks != 0) + return -EINVAL; + + return 0; +} + +static int fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const struct plane_size *plane_size, + union dc_tiling_info *tiling_info, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + const bool force_disable_dcc) +{ + const uint64_t modifier = afb->base.modifier; + int ret = 0; + + fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); + tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); + + if (modifier_has_dcc(modifier) && !force_disable_dcc) { + uint64_t dcc_address = afb->address + afb->base.offsets[1]; + bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); + bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); + + dcc->enable = 1; + dcc->meta_pitch = afb->base.pitches[1]; + dcc->independent_64b_blks = independent_64b_blks; + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) { + if (independent_64b_blks && independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl; + else if (independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_128b; + else if (independent_64b_blks && !independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + } else { + if (independent_64b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + } + + address->grph.meta_addr.low_part = lower_32_bits(dcc_address); + address->grph.meta_addr.high_part = upper_32_bits(dcc_address); + } + + ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); + if (ret) + drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); + + return ret; +} + +static void add_gfx10_1_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); + + + /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); +} + +static void add_gfx9_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pipe_xor_bits = min(8, pipes + + ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); + int bank_xor_bits = min(8 - pipe_xor_bits, + ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); + int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + + ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); + + + if (adev->family == AMDGPU_FAMILY_RV) { + /* Raven2 and later */ + bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81; + + /* + * No _D DCC swizzles yet because we only allow 32bpp, which + * doesn't support _D on DCN + */ + + if (has_constant_encode) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1)); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0)); + + if (has_constant_encode) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes)); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) | + AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes)); + } + + /* + * Only supported for 64bpp on Raven, will be filtered on format in + * dm_plane_format_mod_supported. + */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); + + if (adev->family == AMDGPU_FAMILY_RV) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); + } + + /* + * Only supported for 64bpp on Raven, will be filtered on format in + * dm_plane_format_mod_supported. + */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + if (adev->family == AMDGPU_FAMILY_RV) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + } +} + +static void add_gfx10_3_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs)); + + /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); +} + +static void add_gfx11_modifiers(struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int num_pipes = 0; + int pipe_xor_bits = 0; + int num_pkrs = 0; + int pkrs = 0; + u32 gb_addr_config; + u8 i = 0; + unsigned swizzle_r_x; + uint64_t modifier_r_x; + uint64_t modifier_dcc_best; + uint64_t modifier_dcc_4k; + + /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from + * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} + */ + gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); + ASSERT(gb_addr_config != 0); + + num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); + pkrs = ilog2(num_pkrs); + num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); + pipe_xor_bits = ilog2(num_pipes); + + for (i = 0; i < 2; i++) { + /* Insert the best one first. */ + /* R_X swizzle modes are the best for rendering and DCC requires them. */ + if (num_pipes > 16) + swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; + else + swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; + + modifier_r_x = AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(TILE, swizzle_r_x) | + AMD_FMT_MOD_SET(PACKERS, pkrs); + + /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ + modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); + + /* DCC settings for 4K and greater resolutions. (required by display hw) */ + modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); + + add_modifier(mods, size, capacity, modifier_dcc_best); + add_modifier(mods, size, capacity, modifier_dcc_4k); + + add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)); + add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)); + + add_modifier(mods, size, capacity, modifier_r_x); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)); +} + +static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods) +{ + uint64_t size = 0, capacity = 128; + *mods = NULL; + + /* We have not hooked up any pre-GFX9 modifiers. */ + if (adev->family < AMDGPU_FAMILY_AI) + return 0; + + *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); + + if (plane_type == DRM_PLANE_TYPE_CURSOR) { + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); + return *mods ? 0 : -ENOMEM; + } + + switch (adev->family) { + case AMDGPU_FAMILY_AI: + case AMDGPU_FAMILY_RV: + add_gfx9_modifiers(adev, mods, &size, &capacity); + break; + case AMDGPU_FAMILY_NV: + case AMDGPU_FAMILY_VGH: + case AMDGPU_FAMILY_YC: + case AMDGPU_FAMILY_GC_10_3_6: + case AMDGPU_FAMILY_GC_10_3_7: + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + add_gfx10_3_modifiers(adev, mods, &size, &capacity); + else + add_gfx10_1_modifiers(adev, mods, &size, &capacity); + break; + case AMDGPU_FAMILY_GC_11_0_0: + case AMDGPU_FAMILY_GC_11_0_2: + add_gfx11_modifiers(adev, mods, &size, &capacity); + break; + } + + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); + + /* INVALID marks the end of the list. */ + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); + + if (!*mods) + return -ENOMEM; + + return 0; +} + +static int get_plane_formats(const struct drm_plane *plane, + const struct dc_plane_cap *plane_cap, + uint32_t *formats, int max_formats) +{ + int i, num_formats = 0; + + /* + * TODO: Query support for each group of formats directly from + * DC plane caps. This will require adding more formats to the + * caps list. + */ + + switch (plane->type) { + case DRM_PLANE_TYPE_PRIMARY: + for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = rgb_formats[i]; + } + + if (plane_cap && plane_cap->pixel_format_support.nv12) + formats[num_formats++] = DRM_FORMAT_NV12; + if (plane_cap && plane_cap->pixel_format_support.p010) + formats[num_formats++] = DRM_FORMAT_P010; + if (plane_cap && plane_cap->pixel_format_support.fp16) { + formats[num_formats++] = DRM_FORMAT_XRGB16161616F; + formats[num_formats++] = DRM_FORMAT_ARGB16161616F; + formats[num_formats++] = DRM_FORMAT_XBGR16161616F; + formats[num_formats++] = DRM_FORMAT_ABGR16161616F; + } + break; + + case DRM_PLANE_TYPE_OVERLAY: + for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = overlay_formats[i]; + } + break; + + case DRM_PLANE_TYPE_CURSOR: + for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = cursor_formats[i]; + } + break; + } + + return num_formats; +} + +#ifdef CONFIG_DRM_AMD_DC_HDR +static int attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) +{ + drm_object_attach_property(&plane->base, + dm->degamma_lut_property, + 0); + drm_object_attach_property(&plane->base, + dm->degamma_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, dm->ctm_property, + 0); + drm_object_attach_property(&plane->base, dm->sdr_boost_property, + DEFAULT_SDR_BOOST); + + return 0; +} +#endif + +int fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc) +{ + const struct drm_framebuffer *fb = &afb->base; + int ret; + + memset(tiling_info, 0, sizeof(*tiling_info)); + memset(plane_size, 0, sizeof(*plane_size)); + memset(dcc, 0, sizeof(*dcc)); + memset(address, 0, sizeof(*address)); + + address->tmz_surface = tmz_surface; + + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + uint64_t addr = afb->address + fb->offsets[0]; + + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + address->type = PLN_ADDR_TYPE_GRAPHICS; + address->grph.addr.low_part = lower_32_bits(addr); + address->grph.addr.high_part = upper_32_bits(addr); + } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { + uint64_t luma_addr = afb->address + fb->offsets[0]; + uint64_t chroma_addr = afb->address + fb->offsets[1]; + + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + plane_size->chroma_size.x = 0; + plane_size->chroma_size.y = 0; + /* TODO: set these based on surface format */ + plane_size->chroma_size.width = fb->width / 2; + plane_size->chroma_size.height = fb->height / 2; + + plane_size->chroma_pitch = + fb->pitches[1] / fb->format->cpp[1]; + + address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; + address->video_progressive.luma_addr.low_part = + lower_32_bits(luma_addr); + address->video_progressive.luma_addr.high_part = + upper_32_bits(luma_addr); + address->video_progressive.chroma_addr.low_part = + lower_32_bits(chroma_addr); + address->video_progressive.chroma_addr.high_part = + upper_32_bits(chroma_addr); + } + + if (adev->family >= AMDGPU_FAMILY_AI) { + ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, + rotation, plane_size, + tiling_info, dcc, + address, + force_disable_dcc); + if (ret) + return ret; + } else { + fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); + } + + return 0; +} + +static int dm_plane_helper_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; + uint32_t domain; + int r; + + if (!new_state->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + + afb = to_amdgpu_framebuffer(new_state->fb); + obj = new_state->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + + r = amdgpu_bo_reserve(rbo, true); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + if (r) { + dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + goto error_unlock; + } + + if (plane->type != DRM_PLANE_TYPE_CURSOR) + domain = amdgpu_display_supported_domains(adev, rbo->flags); + else + domain = AMDGPU_GEM_DOMAIN_VRAM; + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + goto error_unlock; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + DRM_ERROR("%p bind failed\n", rbo); + goto error_unpin; + } + + r = drm_gem_plane_helper_prepare_fb(plane, new_state); + if (unlikely(r != 0)) + goto error_unpin; + + amdgpu_bo_unreserve(rbo); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + /** + * We don't do surface updates on planes that have been newly created, + * but we also don't have the afb->address during atomic check. + * + * Fill in buffer attributes depending on the address here, but only on + * newly created planes since they're not being used by DC yet and this + * won't modify global state. + */ + dm_plane_state_old = to_dm_plane_state(plane->state); + dm_plane_state_new = to_dm_plane_state(new_state); + + if (dm_plane_state_new->dc_state && + dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { + struct dc_plane_state *plane_state = + dm_plane_state_new->dc_state; + bool force_disable_dcc = !plane_state->dcc.enable; + + fill_plane_buffer_attributes( + adev, afb, plane_state->format, plane_state->rotation, + afb->tiling_flags, + &plane_state->tiling_info, &plane_state->plane_size, + &plane_state->dcc, &plane_state->address, + afb->tmz_surface, force_disable_dcc); + } + + return 0; + +error_unpin: + amdgpu_bo_unpin(rbo); + +error_unlock: + amdgpu_bo_unreserve(rbo); + return r; +} + +static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct amdgpu_bo *rbo; + int r; + + if (!old_state->fb) + return; + + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + struct dc *dc = adev->dm.dc; + /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ + struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; + + switch (fb->format->format) { + case DRM_FORMAT_P010: + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV21: + *max_upscale = plane_cap->max_upscale_factor.nv12; + *min_downscale = plane_cap->max_downscale_factor.nv12; + break; + + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + *max_upscale = plane_cap->max_upscale_factor.fp16; + *min_downscale = plane_cap->max_downscale_factor.fp16; + break; + + default: + *max_upscale = plane_cap->max_upscale_factor.argb8888; + *min_downscale = plane_cap->max_downscale_factor.argb8888; + break; + } + + /* + * A factor of 1 in the plane_cap means to not allow scaling, ie. use a + * scaling factor of 1.0 == 1000 units. + */ + if (*max_upscale == 1) + *max_upscale = 1000; + + if (*min_downscale == 1) + *min_downscale = 1000; +} + +int dm_plane_helper_check_state(struct drm_plane_state *state, + struct drm_crtc_state *new_crtc_state) +{ + struct drm_framebuffer *fb = state->fb; + int min_downscale, max_upscale; + int min_scale = 0; + int max_scale = INT_MAX; + + /* Plane enabled? Validate viewport and get scaling factors from plane caps. */ + if (fb && state->crtc) { + /* Validate viewport to cover the case when only the position changes */ + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) { + int viewport_width = state->crtc_w; + int viewport_height = state->crtc_h; + + if (state->crtc_x < 0) + viewport_width += state->crtc_x; + else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay) + viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x; + + if (state->crtc_y < 0) + viewport_height += state->crtc_y; + else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay) + viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y; + + if (viewport_width < 0 || viewport_height < 0) { + DRM_DEBUG_ATOMIC("Plane completely outside of screen\n"); + return -EINVAL; + } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */ + DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2); + return -EINVAL; + } else if (viewport_height < MIN_VIEWPORT_SIZE) { + DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE); + return -EINVAL; + } + + } + + /* Get min/max allowed scaling factors from plane caps. */ + get_min_max_dc_plane_scaling(state->crtc->dev, fb, + &min_downscale, &max_upscale); + /* + * Convert to drm convention: 16.16 fixed point, instead of dc's + * 1.0 == 1000. Also drm scaling is src/dst instead of dc's + * dst/src, so min_scale = 1.0 / max_upscale, etc. + */ + min_scale = (1000 << 16) / max_upscale; + max_scale = (1000 << 16) / min_downscale; + } + + return drm_atomic_helper_check_plane_state( + state, new_crtc_state, min_scale, max_scale, true, true); +} + +int fill_dc_scaling_info(struct amdgpu_device *adev, + const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info) +{ + int scale_w, scale_h, min_downscale, max_upscale; + + memset(scaling_info, 0, sizeof(*scaling_info)); + + /* Source is fixed 16.16 but we ignore mantissa for now... */ + scaling_info->src_rect.x = state->src_x >> 16; + scaling_info->src_rect.y = state->src_y >> 16; + + /* + * For reasons we don't (yet) fully understand a non-zero + * src_y coordinate into an NV12 buffer can cause a + * system hang on DCN1x. + * To avoid hangs (and maybe be overly cautious) + * let's reject both non-zero src_x and src_y. + * + * We currently know of only one use-case to reproduce a + * scenario with non-zero src_x and src_y for NV12, which + * is to gesture the YouTube Android app into full screen + * on ChromeOS. + */ + if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || + (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) && + (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && + (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) + return -EINVAL; + + scaling_info->src_rect.width = state->src_w >> 16; + if (scaling_info->src_rect.width == 0) + return -EINVAL; + + scaling_info->src_rect.height = state->src_h >> 16; + if (scaling_info->src_rect.height == 0) + return -EINVAL; + + scaling_info->dst_rect.x = state->crtc_x; + scaling_info->dst_rect.y = state->crtc_y; + + if (state->crtc_w == 0) + return -EINVAL; + + scaling_info->dst_rect.width = state->crtc_w; + + if (state->crtc_h == 0) + return -EINVAL; + + scaling_info->dst_rect.height = state->crtc_h; + + /* DRM doesn't specify clipping on destination output. */ + scaling_info->clip_rect = scaling_info->dst_rect; + + /* Validate scaling per-format with DC plane caps */ + if (state->plane && state->plane->dev && state->fb) { + get_min_max_dc_plane_scaling(state->plane->dev, state->fb, + &min_downscale, &max_upscale); + } else { + min_downscale = 250; + max_upscale = 16000; + } + + scale_w = scaling_info->dst_rect.width * 1000 / + scaling_info->src_rect.width; + + if (scale_w < min_downscale || scale_w > max_upscale) + return -EINVAL; + + scale_h = scaling_info->dst_rect.height * 1000 / + scaling_info->src_rect.height; + + if (scale_h < min_downscale || scale_h > max_upscale) + return -EINVAL; + + /* + * The "scaling_quality" can be ignored for now, quality = 0 has DC + * assume reasonable defaults based on the format. + */ + + return 0; +} + +static int dm_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct dc *dc = adev->dm.dc; + struct dm_plane_state *dm_plane_state; + struct dc_scaling_info scaling_info; + struct drm_crtc_state *new_crtc_state; + int ret; + + trace_amdgpu_dm_plane_atomic_check(new_plane_state); + + dm_plane_state = to_dm_plane_state(new_plane_state); + + if (!dm_plane_state->dc_state) + return 0; + + new_crtc_state = + drm_atomic_get_new_crtc_state(state, + new_plane_state->crtc); + if (!new_crtc_state) + return -EINVAL; + + ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state); + if (ret) + return ret; + + ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info); + if (ret) + return ret; + + if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) + return 0; + + return -EINVAL; +} + +static int dm_plane_atomic_async_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + /* Only support async updates on cursor planes. */ + if (plane->type != DRM_PLANE_TYPE_CURSOR) + return -EINVAL; + + return 0; +} + +static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, + struct dc_cursor_position *position) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + int x, y; + int xorigin = 0, yorigin = 0; + + if (!crtc || !plane->state->fb) + return 0; + + if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || + (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { + DRM_ERROR("%s: bad cursor width or height %d x %d\n", + __func__, + plane->state->crtc_w, + plane->state->crtc_h); + return -EINVAL; + } + + x = plane->state->crtc_x; + y = plane->state->crtc_y; + + if (x <= -amdgpu_crtc->max_cursor_width || + y <= -amdgpu_crtc->max_cursor_height) + return 0; + + if (x < 0) { + xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); + x = 0; + } + if (y < 0) { + yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); + y = 0; + } + position->enable = true; + position->translate_by_source = true; + position->x = x; + position->y = y; + position->x_hotspot = xorigin; + position->y_hotspot = yorigin; + + return 0; +} + +void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); + struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; + struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + uint64_t address = afb ? afb->address : 0; + struct dc_cursor_position position = {0}; + struct dc_cursor_attributes attributes; + int ret; + + if (!plane->state->fb && !old_plane_state->fb) + return; + + DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n", + __func__, + amdgpu_crtc->crtc_id, + plane->state->crtc_w, + plane->state->crtc_h); + + ret = get_cursor_position(plane, crtc, &position); + if (ret) + return; + + if (!position.enable) { + /* turn off cursor */ + if (crtc_state && crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); + dc_stream_set_cursor_position(crtc_state->stream, + &position); + mutex_unlock(&adev->dm.dc_lock); + } + return; + } + + amdgpu_crtc->cursor_width = plane->state->crtc_w; + amdgpu_crtc->cursor_height = plane->state->crtc_h; + + memset(&attributes, 0, sizeof(attributes)); + attributes.address.high_part = upper_32_bits(address); + attributes.address.low_part = lower_32_bits(address); + attributes.width = plane->state->crtc_w; + attributes.height = plane->state->crtc_h; + attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA; + attributes.rotation_angle = 0; + attributes.attribute_flags.value = 0; + + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + + if (crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); + if (!dc_stream_set_cursor_attributes(crtc_state->stream, + &attributes)) + DRM_ERROR("DC failed to set cursor attributes\n"); + + if (!dc_stream_set_cursor_position(crtc_state->stream, + &position)) + DRM_ERROR("DC failed to set cursor position\n"); + mutex_unlock(&adev->dm.dc_lock); + } +} + +static void dm_plane_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_plane_state *old_state = + drm_atomic_get_old_plane_state(state, plane); + + trace_amdgpu_dm_atomic_update_cursor(new_state); + + swap(plane->state->fb, new_state->fb); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_w = new_state->src_w; + plane->state->src_h = new_state->src_h; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_w = new_state->crtc_w; + plane->state->crtc_h = new_state->crtc_h; + + handle_cursor_update(plane, old_state); +} + +static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { + .prepare_fb = dm_plane_helper_prepare_fb, + .cleanup_fb = dm_plane_helper_cleanup_fb, + .atomic_check = dm_plane_atomic_check, + .atomic_async_check = dm_plane_atomic_async_check, + .atomic_async_update = dm_plane_atomic_async_update +}; + +static void dm_drm_plane_reset(struct drm_plane *plane) +{ + struct dm_plane_state *amdgpu_state = NULL; + + if (plane->state) + plane->funcs->atomic_destroy_state(plane, plane->state); + + amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); + WARN_ON(amdgpu_state == NULL); + + if (amdgpu_state) + __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); +#ifdef CONFIG_DRM_AMD_DC_HDR + if (amdgpu_state) + amdgpu_state->sdr_boost = DEFAULT_SDR_BOOST; +#endif +} + +static struct drm_plane_state * +dm_drm_plane_duplicate_state(struct drm_plane *plane) +{ + struct dm_plane_state *dm_plane_state, *old_dm_plane_state; + + old_dm_plane_state = to_dm_plane_state(plane->state); + dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL); + if (!dm_plane_state) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base); + + if (old_dm_plane_state->dc_state) { + dm_plane_state->dc_state = old_dm_plane_state->dc_state; + dc_plane_state_retain(dm_plane_state->dc_state); + } + +#ifdef CONFIG_DRM_AMD_DC_HDR + if (dm_plane_state->degamma_lut) + drm_property_blob_get(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_get(dm_plane_state->ctm); + + dm_plane_state->sdr_boost = old_dm_plane_state->sdr_boost; +#endif + + return &dm_plane_state->base; +} + +static bool dm_plane_format_mod_supported(struct drm_plane *plane, + uint32_t format, + uint64_t modifier) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + const struct drm_format_info *info = drm_format_info(format); + struct hw_asic_id asic_id = adev->dm.dc->ctx->asic_id; + + enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; + + if (!info) + return false; + + /* + * We always have to allow these modifiers: + * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. + * 2. Not passing any modifiers is the same as explicitly passing INVALID. + */ + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_INVALID) { + return true; + } + + /* check if swizzle mode is supported by this version of DCN */ + switch (asic_id.chip_family) { + case FAMILY_SI: + case FAMILY_CI: + case FAMILY_KV: + case FAMILY_CZ: + case FAMILY_VI: + /* asics before AI does not have modifier support */ + return false; + case FAMILY_AI: + case FAMILY_RV: + case FAMILY_NV: + case FAMILY_VGH: + case FAMILY_YELLOW_CARP: + case AMDGPU_FAMILY_GC_10_3_6: + case AMDGPU_FAMILY_GC_10_3_7: + switch (AMD_FMT_MOD_GET(TILE, modifier)) { + case AMD_FMT_MOD_TILE_GFX9_64K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D_X: + case AMD_FMT_MOD_TILE_GFX9_64K_S_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D: + return true; + default: + return false; + } + break; + case AMDGPU_FAMILY_GC_11_0_0: + switch (AMD_FMT_MOD_GET(TILE, modifier)) { + case AMD_FMT_MOD_TILE_GFX11_256K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D_X: + case AMD_FMT_MOD_TILE_GFX9_64K_S_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D: + return true; + default: + return false; + } + break; + default: + ASSERT(0); /* Unknown asic */ + break; + } + + /* + * For D swizzle the canonical modifier depends on the bpp, so check + * it here. + */ + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && + adev->family >= AMDGPU_FAMILY_NV) { + if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4) + return false; + } + + if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D && + info->cpp[0] < 8) + return false; + + if (modifier_has_dcc(modifier)) { + /* Per radeonsi comments 16/64 bpp are more complicated. */ + if (info->cpp[0] != 4) + return false; + /* We support multi-planar formats, but not when combined with + * additional DCC metadata planes. + */ + if (info->num_planes > 1) + return false; + } + + return true; +} + +static void dm_drm_plane_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + +#ifdef CONFIG_DRM_AMD_DC_HDR + drm_property_blob_put(dm_plane_state->degamma_lut); + drm_property_blob_put(dm_plane_state->ctm); +#endif + if (dm_plane_state->dc_state) + dc_plane_state_release(dm_plane_state->dc_state); + + drm_atomic_helper_plane_destroy_state(plane, state); +} + +#ifdef CONFIG_DRM_AMD_DC_HDR +/* copied from drm_atomic_uapi.c */ +static int atomic_replace_property_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced) +{ + struct drm_property_blob *new_blob = NULL; + + if (blob_id != 0) { + new_blob = drm_property_lookup_blob(dev, blob_id); + if (new_blob == NULL) + return -EINVAL; + + if (expected_size > 0 && + new_blob->length != expected_size) { + drm_property_blob_put(new_blob); + return -EINVAL; + } + if (expected_elem_size > 0 && + new_blob->length % expected_elem_size != 0) { + drm_property_blob_put(new_blob); + return -EINVAL; + } + } + + *replaced |= drm_property_replace_blob(blob, new_blob); + drm_property_blob_put(new_blob); + + return 0; +} + +int dm_drm_plane_set_property(struct drm_plane *plane, + struct drm_plane_state *state, + struct drm_property *property, + uint64_t val) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + int ret = 0; + bool replaced; + + if (property == adev->dm.degamma_lut_property) { + ret = atomic_replace_property_blob_from_id(adev_to_drm(adev), + &dm_plane_state->degamma_lut, + val, -1, sizeof(struct drm_color_lut), + &replaced); + } else if (property == adev->dm.ctm_property) { + ret = atomic_replace_property_blob_from_id(adev_to_drm(adev), + &dm_plane_state->ctm, + val, + sizeof(struct drm_color_ctm), -1, + &replaced); + } else if (property == adev->dm.sdr_boost_property) { + dm_plane_state->sdr_boost = val; + } else { + return -EINVAL; + } + + return ret; +} + +int dm_drm_plane_get_property(struct drm_plane *plane, + const struct drm_plane_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + + if (property == adev->dm.degamma_lut_property) { + *val = (dm_plane_state->degamma_lut) ? + dm_plane_state->degamma_lut->base.id : 0; + } else if (property == adev->dm.ctm_property) { + *val = (dm_plane_state->ctm) ? dm_plane_state->ctm->base.id : 0; + } else if (property == adev->dm.sdr_boost_property) { + *val = dm_plane_state->sdr_boost; + } else { + return -EINVAL; + } + + return 0; +} +#endif + +static const struct drm_plane_funcs dm_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_primary_helper_destroy, + .reset = dm_drm_plane_reset, + .atomic_duplicate_state = dm_drm_plane_duplicate_state, + .atomic_destroy_state = dm_drm_plane_destroy_state, + .format_mod_supported = dm_plane_format_mod_supported, +#ifdef CONFIG_DRM_AMD_DC_HDR + .atomic_set_property = dm_drm_plane_set_property, + .atomic_get_property = dm_drm_plane_get_property, +#endif +}; + +int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap) +{ + uint32_t formats[32]; + int num_formats; + int res = -EPERM; + unsigned int supported_rotations; + uint64_t *modifiers = NULL; + + num_formats = get_plane_formats(plane, plane_cap, formats, + ARRAY_SIZE(formats)); + + res = get_plane_modifiers(dm->adev, plane->type, &modifiers); + if (res) + return res; + + res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, + &dm_plane_funcs, formats, num_formats, + modifiers, plane->type, NULL); + kfree(modifiers); + if (res) + return res; + + if (plane->type == DRM_PLANE_TYPE_OVERLAY && + plane_cap && plane_cap->per_pixel_alpha) { + unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); + + drm_plane_create_alpha_property(plane); + drm_plane_create_blend_mode_property(plane, blend_caps); + } + + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + plane_cap && + (plane_cap->pixel_format_support.nv12 || + plane_cap->pixel_format_support.p010)) { + /* This only affects YUV formats. */ + drm_plane_create_color_properties( + plane, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709) | + BIT(DRM_COLOR_YCBCR_BT2020), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + } + + supported_rotations = + DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; + + if (dm->adev->asic_type >= CHIP_BONAIRE && + plane->type != DRM_PLANE_TYPE_CURSOR) + drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, + supported_rotations); + + drm_plane_helper_add(plane, &dm_plane_helper_funcs); + +#ifdef CONFIG_DRM_AMD_DC_HDR + attach_color_mgmt_properties(dm, plane); +#endif + /* Create (reset) the plane state */ + if (plane->funcs->reset) + plane->funcs->reset(plane); + + return 0; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h new file mode 100644 index 000000000000..95168c2cfa6f --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_PLANE_H__ +#define __AMDGPU_DM_PLANE_H__ + +#include "dc.h" + +void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state); + +int fill_dc_scaling_info(struct amdgpu_device *adev, + const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info); + +void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale); + +int dm_plane_helper_check_state(struct drm_plane_state *state, + struct drm_crtc_state *new_crtc_state); + +bool modifier_has_dcc(uint64_t modifier); + +unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier); + +int fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc); + +int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap); + +const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); + +void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value); + +#endif -- cgit v1.2.3 From 473683a03495a5ed9c75a02c7497f32d6cf92c7a Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 20 Jul 2022 15:43:21 -0400 Subject: drm/amd/display: Create a file dedicated for CRTC [Why] The amdgpu_dm file contains most of the code that works as an interface between DRM API and DC. As a result, this file becomes very large since it comprises multiple abstractions such as CRTC manipulation. [How] This commit extracts the CRTC code to its specific file named amdgpu_dm_crtc. This change does not change anything inside the functions; the only exception is converting some static functions to a global function. v2: fix ifdef merge mix up (Alex) Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Acked-by: Alan Liu Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/Makefile | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 434 +------------------ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 463 +++++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h | 51 +++ 4 files changed, 516 insertions(+), 433 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index ec559ea902a3..90fb0f3cdb6f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -28,6 +28,7 @@ AMDGPUDM = \ amdgpu_dm.o \ amdgpu_dm_plane.o \ + amdgpu_dm_crtc.o \ amdgpu_dm_irq.o \ amdgpu_dm_mst_types.o \ amdgpu_dm_color.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8a1821a2551c..4e51b06fcdd9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -47,6 +47,7 @@ #include "atom.h" #include "amdgpu_dm.h" #include "amdgpu_dm_plane.h" +#include "amdgpu_dm_crtc.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include @@ -207,9 +208,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t link_index); static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *amdgpu_dm_connector, uint32_t link_index, @@ -338,20 +336,6 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, return NULL; } -static inline bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) -{ - return acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE || - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_FIXED; -} - -static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) -{ - return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || - dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; -} - static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { @@ -467,26 +451,6 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } -static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) -{ - struct drm_crtc *crtc = &acrtc->base; - struct drm_device *dev = crtc->dev; - unsigned long flags; - - drm_crtc_handle_vblank(crtc); - - spin_lock_irqsave(&dev->event_lock, flags); - - /* Send completion event for cursor-only commits */ - if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { - drm_crtc_send_vblank_event(crtc, acrtc->event); - drm_crtc_vblank_put(crtc); - acrtc->event = NULL; - } - - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -1264,52 +1228,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } -static void vblank_control_worker(struct work_struct *work) -{ - struct vblank_control_work *vblank_work = - container_of(work, struct vblank_control_work, work); - struct amdgpu_display_manager *dm = vblank_work->dm; - - mutex_lock(&dm->dc_lock); - - if (vblank_work->enable) - dm->active_vblank_irq_count++; - else if(dm->active_vblank_irq_count) - dm->active_vblank_irq_count--; - - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); - - /* - * Control PSR based on vblank requirements from OS - * - * If panel supports PSR SU, there's no need to disable PSR when OS is - * submitting fast atomic commits (we infer this by whether the OS - * requests vblank events). Fast atomic commits will simply trigger a - * full-frame-update (FFU); a specific case of selective-update (SU) - * where the SU region is the full hactive*vactive region. See - * fill_dc_dirty_rects(). - */ - if (vblank_work->stream && vblank_work->stream->link) { - if (vblank_work->enable) { - if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && - vblank_work->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(vblank_work->stream); - } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && - !vblank_work->stream->link->psr_settings.psr_allow_active && - vblank_work->acrtc->dm_irq_params.allow_psr_entry) { - amdgpu_dm_psr_enable(vblank_work->stream); - } - } - - mutex_unlock(&dm->dc_lock); - - dc_stream_release(vblank_work->stream); - - kfree(vblank_work); -} - static void dm_handle_hpd_rx_offload_work(struct work_struct *work) { struct hpd_rx_irq_offload_work *offload_work; @@ -2385,9 +2303,6 @@ static int dm_hw_fini(void *handle) } -static int dm_enable_vblank(struct drm_crtc *crtc); -static void dm_disable_vblank(struct drm_crtc *crtc); - static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, struct dc_state *state, bool enable) { @@ -4679,13 +4594,6 @@ static int dm_early_init(void *handle) return 0; } -static bool modeset_required(struct drm_crtc_state *crtc_state, - struct dc_stream_state *new_stream, - struct dc_stream_state *old_stream) -{ - return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); -} - static bool modereset_required(struct drm_crtc_state *crtc_state) { return !crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); @@ -5916,182 +5824,6 @@ finish: return stream; } -static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) -{ - drm_crtc_cleanup(crtc); - kfree(crtc); -} - -static void dm_crtc_destroy_state(struct drm_crtc *crtc, - struct drm_crtc_state *state) -{ - struct dm_crtc_state *cur = to_dm_crtc_state(state); - - /* TODO Destroy dc_stream objects are stream object is flattened */ - if (cur->stream) - dc_stream_release(cur->stream); - - - __drm_atomic_helper_crtc_destroy_state(state); - - - kfree(state); -} - -static void dm_crtc_reset_state(struct drm_crtc *crtc) -{ - struct dm_crtc_state *state; - - if (crtc->state) - dm_crtc_destroy_state(crtc, crtc->state); - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (WARN_ON(!state)) - return; - - __drm_atomic_helper_crtc_reset(crtc, &state->base); -} - -static struct drm_crtc_state * -dm_crtc_duplicate_state(struct drm_crtc *crtc) -{ - struct dm_crtc_state *state, *cur; - - cur = to_dm_crtc_state(crtc->state); - - if (WARN_ON(!crtc->state)) - return NULL; - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (!state) - return NULL; - - __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base); - - if (cur->stream) { - state->stream = cur->stream; - dc_stream_retain(state->stream); - } - - state->active_planes = cur->active_planes; - state->vrr_infopacket = cur->vrr_infopacket; - state->abm_level = cur->abm_level; - state->vrr_supported = cur->vrr_supported; - state->freesync_config = cur->freesync_config; - state->cm_has_degamma = cur->cm_has_degamma; - state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; - state->mpo_requested = cur->mpo_requested; - /* TODO Duplicate dc_stream after objects are stream object is flattened */ - - return &state->base; -} - -#ifdef CONFIG_DEBUG_FS -static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) -{ - crtc_debugfs_init(crtc); - - return 0; -} -#endif - -static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) -{ - enum dc_irq_source irq_source; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - int rc; - - irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; - - rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; - - DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", - acrtc->crtc_id, enable ? "en" : "dis", rc); - return rc; -} - -static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) -{ - enum dc_irq_source irq_source; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); - struct amdgpu_display_manager *dm = &adev->dm; - struct vblank_control_work *work; - int rc = 0; - - if (enable) { - /* vblank irq on -> Only need vupdate irq in vrr mode */ - if (amdgpu_dm_vrr_active(acrtc_state)) - rc = dm_set_vupdate_irq(crtc, true); - } else { - /* vblank irq off -> vupdate irq off */ - rc = dm_set_vupdate_irq(crtc, false); - } - - if (rc) - return rc; - - irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; - - if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - return -EBUSY; - - if (amdgpu_in_reset(adev)) - return 0; - - if (dm->vblank_control_workqueue) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - - INIT_WORK(&work->work, vblank_control_worker); - work->dm = dm; - work->acrtc = acrtc; - work->enable = enable; - - if (acrtc_state->stream) { - dc_stream_retain(acrtc_state->stream); - work->stream = acrtc_state->stream; - } - - queue_work(dm->vblank_control_workqueue, &work->work); - } - - return 0; -} - -static int dm_enable_vblank(struct drm_crtc *crtc) -{ - return dm_set_vblank(crtc, true); -} - -static void dm_disable_vblank(struct drm_crtc *crtc) -{ - dm_set_vblank(crtc, false); -} - -/* Implemented only the options currently available for the driver */ -static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - .reset = dm_crtc_reset_state, - .destroy = amdgpu_dm_crtc_destroy, - .set_config = drm_atomic_helper_set_config, - .page_flip = drm_atomic_helper_page_flip, - .atomic_duplicate_state = dm_crtc_duplicate_state, - .atomic_destroy_state = dm_crtc_destroy_state, - .set_crc_source = amdgpu_dm_crtc_set_crc_source, - .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, - .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = dm_enable_vblank, - .disable_vblank = dm_disable_vblank, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -#if defined(CONFIG_DEBUG_FS) - .late_register = amdgpu_dm_crtc_late_register, -#endif -}; - static enum drm_connector_status amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) { @@ -6627,113 +6359,6 @@ amdgpu_dm_connector_helper_funcs = { .atomic_check = amdgpu_dm_connector_atomic_check, }; -static void dm_crtc_helper_disable(struct drm_crtc *crtc) -{ -} - -static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) -{ - struct drm_atomic_state *state = new_crtc_state->state; - struct drm_plane *plane; - int num_active = 0; - - drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { - struct drm_plane_state *new_plane_state; - - /* Cursor planes are "fake". */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; - - new_plane_state = drm_atomic_get_new_plane_state(state, plane); - - if (!new_plane_state) { - /* - * The plane is enable on the CRTC and hasn't changed - * state. This means that it previously passed - * validation and is therefore enabled. - */ - num_active += 1; - continue; - } - - /* We need a framebuffer to be considered enabled. */ - num_active += (new_plane_state->fb != NULL); - } - - return num_active; -} - -static void dm_update_crtc_active_planes(struct drm_crtc *crtc, - struct drm_crtc_state *new_crtc_state) -{ - struct dm_crtc_state *dm_new_crtc_state = - to_dm_crtc_state(new_crtc_state); - - dm_new_crtc_state->active_planes = 0; - - if (!dm_new_crtc_state->stream) - return; - - dm_new_crtc_state->active_planes = - count_crtc_active_planes(new_crtc_state); -} - -static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, - struct drm_atomic_state *state) -{ - struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, - crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct dc *dc = adev->dm.dc; - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - int ret = -EINVAL; - - trace_amdgpu_dm_crtc_atomic_check(crtc_state); - - dm_update_crtc_active_planes(crtc, crtc_state); - - if (WARN_ON(unlikely(!dm_crtc_state->stream && - modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { - return ret; - } - - /* - * We require the primary plane to be enabled whenever the CRTC is, otherwise - * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other - * planes are disabled, which is not supported by the hardware. And there is legacy - * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. - */ - if (crtc_state->enable && - !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { - DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); - return -EINVAL; - } - - /* In some use cases, like reset, no stream is attached */ - if (!dm_crtc_state->stream) - return 0; - - if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) - return 0; - - DRM_DEBUG_ATOMIC("Failed DC stream validation\n"); - return ret; -} - -static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - -static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { - .disable = dm_crtc_helper_disable, - .atomic_check = dm_crtc_helper_atomic_check, - .mode_fixup = dm_crtc_helper_mode_fixup, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - static void dm_encoder_helper_disable(struct drm_encoder *encoder) { @@ -6891,63 +6516,6 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } #endif -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t crtc_index) -{ - struct amdgpu_crtc *acrtc = NULL; - struct drm_plane *cursor_plane; - - int res = -ENOMEM; - - cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); - if (!cursor_plane) - goto fail; - - cursor_plane->type = DRM_PLANE_TYPE_CURSOR; - res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); - - acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); - if (!acrtc) - goto fail; - - res = drm_crtc_init_with_planes( - dm->ddev, - &acrtc->base, - plane, - cursor_plane, - &amdgpu_dm_crtc_funcs, NULL); - - if (res) - goto fail; - - drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs); - - /* Create (reset) the plane state */ - if (acrtc->base.funcs->reset) - acrtc->base.funcs->reset(&acrtc->base); - - acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size; - acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size; - - acrtc->crtc_id = crtc_index; - acrtc->base.enabled = false; - acrtc->otg_inst = -1; - - dm->adev->mode_info.crtcs[crtc_index] = acrtc; - drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, - true, MAX_COLOR_LUT_ENTRIES); - drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); - - return 0; - -fail: - kfree(acrtc); - kfree(cursor_plane); - return res; -} - - static int to_drm_connector_type(enum signal_type st) { switch (st) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c new file mode 100644 index 000000000000..828189cb3441 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include +#include + +#include "dc.h" +#include "amdgpu.h" +#include "amdgpu_dm_psr.h" +#include "amdgpu_dm_crtc.h" +#include "amdgpu_dm_plane.h" +#include "amdgpu_dm_trace.h" +#include "amdgpu_dm_debugfs.h" + +void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + +bool modeset_required(struct drm_crtc_state *crtc_state, + struct dc_stream_state *new_stream, + struct dc_stream_state *old_stream) +{ + return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); +} + +bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) + +{ + return acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_VARIABLE || + acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_FIXED; +} + +int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + int rc; + + irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; + + rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; + + DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", + acrtc->crtc_id, enable ? "en" : "dis", rc); + return rc; +} + +bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) +{ + return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || + dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; +} + +static void vblank_control_worker(struct work_struct *work) +{ + struct vblank_control_work *vblank_work = + container_of(work, struct vblank_control_work, work); + struct amdgpu_display_manager *dm = vblank_work->dm; + + mutex_lock(&dm->dc_lock); + + if (vblank_work->enable) + dm->active_vblank_irq_count++; + else if (dm->active_vblank_irq_count) + dm->active_vblank_irq_count--; + + dc_allow_idle_optimizations( + dm->dc, dm->active_vblank_irq_count == 0 ? true : false); + + DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + + /* + * Control PSR based on vblank requirements from OS + * + * If panel supports PSR SU, there's no need to disable PSR when OS is + * submitting fast atomic commits (we infer this by whether the OS + * requests vblank events). Fast atomic commits will simply trigger a + * full-frame-update (FFU); a specific case of selective-update (SU) + * where the SU region is the full hactive*vactive region. See + * fill_dc_dirty_rects(). + */ + if (vblank_work->stream && vblank_work->stream->link) { + if (vblank_work->enable) { + if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && + vblank_work->stream->link->psr_settings.psr_allow_active) + amdgpu_dm_psr_disable(vblank_work->stream); + } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && + !vblank_work->stream->link->psr_settings.psr_allow_active && + vblank_work->acrtc->dm_irq_params.allow_psr_entry) { + amdgpu_dm_psr_enable(vblank_work->stream); + } + } + + mutex_unlock(&dm->dc_lock); + + dc_stream_release(vblank_work->stream); + + kfree(vblank_work); +} + +static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); + struct amdgpu_display_manager *dm = &adev->dm; + struct vblank_control_work *work; + int rc = 0; + + if (enable) { + /* vblank irq on -> Only need vupdate irq in vrr mode */ + if (amdgpu_dm_vrr_active(acrtc_state)) + rc = dm_set_vupdate_irq(crtc, true); + } else { + /* vblank irq off -> vupdate irq off */ + rc = dm_set_vupdate_irq(crtc, false); + } + + if (rc) + return rc; + + irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; + + if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) + return -EBUSY; + + if (amdgpu_in_reset(adev)) + return 0; + + if (dm->vblank_control_workqueue) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, vblank_control_worker); + work->dm = dm; + work->acrtc = acrtc; + work->enable = enable; + + if (acrtc_state->stream) { + dc_stream_retain(acrtc_state->stream); + work->stream = acrtc_state->stream; + } + + queue_work(dm->vblank_control_workqueue, &work->work); + } + + return 0; +} + +int dm_enable_vblank(struct drm_crtc *crtc) +{ + return dm_set_vblank(crtc, true); +} + +void dm_disable_vblank(struct drm_crtc *crtc) +{ + dm_set_vblank(crtc, false); +} + +static void dm_crtc_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct dm_crtc_state *cur = to_dm_crtc_state(state); + + /* TODO Destroy dc_stream objects are stream object is flattened */ + if (cur->stream) + dc_stream_release(cur->stream); + + + __drm_atomic_helper_crtc_destroy_state(state); + + + kfree(state); +} + +static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) +{ + struct dm_crtc_state *state, *cur; + + cur = to_dm_crtc_state(crtc->state); + + if (WARN_ON(!crtc->state)) + return NULL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base); + + if (cur->stream) { + state->stream = cur->stream; + dc_stream_retain(state->stream); + } + + state->active_planes = cur->active_planes; + state->vrr_infopacket = cur->vrr_infopacket; + state->abm_level = cur->abm_level; + state->vrr_supported = cur->vrr_supported; + state->freesync_config = cur->freesync_config; + state->cm_has_degamma = cur->cm_has_degamma; + state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->mpo_requested = cur->mpo_requested; + /* TODO Duplicate dc_stream after objects are stream object is flattened */ + + return &state->base; +} + +static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) +{ + drm_crtc_cleanup(crtc); + kfree(crtc); +} + +static void dm_crtc_reset_state(struct drm_crtc *crtc) +{ + struct dm_crtc_state *state; + + if (crtc->state) + dm_crtc_destroy_state(crtc, crtc->state); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (WARN_ON(!state)) + return; + + __drm_atomic_helper_crtc_reset(crtc, &state->base); +} + +#ifdef CONFIG_DEBUG_FS +static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) +{ + crtc_debugfs_init(crtc); + + return 0; +} +#endif + +/* Implemented only the options currently available for the driver */ +static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { + .reset = dm_crtc_reset_state, + .destroy = amdgpu_dm_crtc_destroy, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = dm_crtc_duplicate_state, + .atomic_destroy_state = dm_crtc_destroy_state, + .set_crc_source = amdgpu_dm_crtc_set_crc_source, + .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, + .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, + .get_vblank_counter = amdgpu_get_vblank_counter_kms, + .enable_vblank = dm_enable_vblank, + .disable_vblank = dm_disable_vblank, + .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#if defined(CONFIG_DEBUG_FS) + .late_register = amdgpu_dm_crtc_late_register, +#endif +}; + +static void dm_crtc_helper_disable(struct drm_crtc *crtc) +{ +} + +static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) +{ + struct drm_atomic_state *state = new_crtc_state->state; + struct drm_plane *plane; + int num_active = 0; + + drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { + struct drm_plane_state *new_plane_state; + + /* Cursor planes are "fake". */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; + + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + + if (!new_plane_state) { + /* + * The plane is enable on the CRTC and hasn't changed + * state. This means that it previously passed + * validation and is therefore enabled. + */ + num_active += 1; + continue; + } + + /* We need a framebuffer to be considered enabled. */ + num_active += (new_plane_state->fb != NULL); + } + + return num_active; +} + +static void dm_update_crtc_active_planes(struct drm_crtc *crtc, + struct drm_crtc_state *new_crtc_state) +{ + struct dm_crtc_state *dm_new_crtc_state = + to_dm_crtc_state(new_crtc_state); + + dm_new_crtc_state->active_planes = 0; + + if (!dm_new_crtc_state->stream) + return; + + dm_new_crtc_state->active_planes = + count_crtc_active_planes(new_crtc_state); +} + +static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + return true; +} + +static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, + crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dc *dc = adev->dm.dc; + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + int ret = -EINVAL; + + trace_amdgpu_dm_crtc_atomic_check(crtc_state); + + dm_update_crtc_active_planes(crtc, crtc_state); + + if (WARN_ON(unlikely(!dm_crtc_state->stream && + modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { + return ret; + } + + /* + * We require the primary plane to be enabled whenever the CRTC is, otherwise + * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other + * planes are disabled, which is not supported by the hardware. And there is legacy + * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. + */ + if (crtc_state->enable && + !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { + DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); + return -EINVAL; + } + + /* In some use cases, like reset, no stream is attached */ + if (!dm_crtc_state->stream) + return 0; + + if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) + return 0; + + DRM_DEBUG_ATOMIC("Failed DC stream validation\n"); + return ret; +} + +static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { + .disable = dm_crtc_helper_disable, + .atomic_check = dm_crtc_helper_atomic_check, + .mode_fixup = dm_crtc_helper_mode_fixup, + .get_scanout_position = amdgpu_crtc_get_scanout_position, +}; + +int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + uint32_t crtc_index) +{ + struct amdgpu_crtc *acrtc = NULL; + struct drm_plane *cursor_plane; + + int res = -ENOMEM; + + cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); + if (!cursor_plane) + goto fail; + + cursor_plane->type = DRM_PLANE_TYPE_CURSOR; + res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); + + acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); + if (!acrtc) + goto fail; + + res = drm_crtc_init_with_planes( + dm->ddev, + &acrtc->base, + plane, + cursor_plane, + &amdgpu_dm_crtc_funcs, NULL); + + if (res) + goto fail; + + drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs); + + /* Create (reset) the plane state */ + if (acrtc->base.funcs->reset) + acrtc->base.funcs->reset(&acrtc->base); + + acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size; + acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size; + + acrtc->crtc_id = crtc_index; + acrtc->base.enabled = false; + acrtc->otg_inst = -1; + + dm->adev->mode_info.crtcs[crtc_index] = acrtc; + drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, + true, MAX_COLOR_LUT_ENTRIES); + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); + + return 0; + +fail: + kfree(acrtc); + kfree(cursor_plane); + return res; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h new file mode 100644 index 000000000000..1ac8692354cf --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_CRTC_H__ +#define __AMDGPU_DM_CRTC_H__ + +void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc); + +bool modeset_required(struct drm_crtc_state *crtc_state, + struct dc_stream_state *new_stream, + struct dc_stream_state *old_stream); + +int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable); + +bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc); + +bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state); + +int dm_enable_vblank(struct drm_crtc *crtc); + +void dm_disable_vblank(struct drm_crtc *crtc); + +int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + uint32_t link_index); + +#endif + -- cgit v1.2.3 From e6c64182ed946f1cb65799de9ee75f8bcc42b2c6 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Wed, 29 Jun 2022 14:55:43 -0400 Subject: drm/amd/display: remove number of DSC slices override in DML [why] Number of DSC slices is an input to DML with high dependency on display specific capability. This isn't something DML can decide on its own. DML has to use the original number of DSC slices input to DML during validation without modification. Otherwise the computed DSC delay will not reflect the current configuration and therefore causes validation failures. [how] Remove DML override for number of DSC slices parameter. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 349e36ae9333..9944f58f0db9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1897,26 +1897,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumSwathWidthInLineBufferChroma); } - /*Number Of DSC Slices*/ - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.PixelClockBackEnd[k] > 4800) { - mode_lib->vba.NumberOfDSCSlices[k] = dml_ceil(mode_lib->vba.PixelClockBackEnd[k] / 600, - 4); - } else if (mode_lib->vba.PixelClockBackEnd[k] > 2400) { - mode_lib->vba.NumberOfDSCSlices[k] = 8; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 1200) { - mode_lib->vba.NumberOfDSCSlices[k] = 4; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 340) { - mode_lib->vba.NumberOfDSCSlices[k] = 2; - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 1; - } - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 0; - } - } - dml32_CalculateSwathAndDETConfiguration( mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, -- cgit v1.2.3 From 074293dd9f61f11898f1f6e01f1560fd4c474025 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 13 Jul 2022 13:17:41 -0400 Subject: drm/amd/display: Fix hard hang if DSC is disabled We want to calculate the DTB clock values when DSC is enabled; however, this is not the current behavior implemented in DCN32. Right now, DML is trying to calculate DSC values even if DSC is disabled; as a result, we can have a hard hang due to wrong clock calculation. This commit fixes this issue by moving the calculation after the DSC check. Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../display/dc/dml/dcn32/display_mode_vba_util_32.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5a701d9df0f7..febaff7d7343 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1686,17 +1686,22 @@ double dml32_RequiredDTBCLK( unsigned int AudioRate, unsigned int AudioLayout) { - double PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); - double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * - dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - double HCBlank = 64 + 32 * - dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + double PixelWordRate; + double HCActive; + double HCBlank; + double AverageTribyteRate; + double HActiveTribyteRate; if (DSCEnable != true) return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); + HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * + dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + HCBlank = 64 + 32 * + dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + HActiveTribyteRate = PixelWordRate * HCActive / HActive; return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; } -- cgit v1.2.3 From a983d263186996002ecafc72f9a0dc5a3a2bfd6f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 16 Jun 2022 10:58:08 -0400 Subject: drm/amd/display: Don't set dram clock change requirement for SubVP [Description] In general cases we want to keep the dram clock change requirement (we prefer configs that support MCLK switch). Only override to false for SubVP. Tested-by: Daniel Wheeler Acked-by: Alan Liu Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index f913daabcca5..92d87745d933 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2984,6 +2984,7 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; + bool subvp_in_use = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -3006,6 +3007,7 @@ int dcn32_populate_dml_pipes_from_context( switch (pipe->stream->mall_stream_config.type) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; break; case SUBVP_PHANTOM: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; @@ -3076,6 +3078,14 @@ int dcn32_populate_dml_pipes_from_context( dcn32_update_det_override_for_mpo(dc, context, pipes); + // In general cases we want to keep the dram clock change requirement + // (prefer configs that support MCLK switch). Only override to false + // for SubVP + if (subvp_in_use) + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; + else + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; + return pipe_cnt; } -- cgit v1.2.3 From 44b0c964201e1e2fbfcdab4472f9260048befbaa Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Tue, 5 Jul 2022 12:22:09 -0400 Subject: drm/amd/display: Update de-tile override to anticipate pipe splitting [Why] For certain MPO configurations, DML will split a pipe after DET buffer has already been allocated by driver, resulting in allocation of more DET segments than the configurable return buffer has, causing underflow. [How] Determine during DET override calculation whether or not a pipe will be split later on by DML, and distribute DET segments based on expected number of pipes. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Acked-by: Alan Liu Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 22 ++-- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 6 +- .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 112 +++++++++++---------- 3 files changed, 69 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 92d87745d933..631876832dfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2984,7 +2984,7 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; - bool subvp_in_use = false; + bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -3046,6 +3046,9 @@ int dcn32_populate_dml_pipes_from_context( if (dc->debug.enable_single_display_2to1_odm_policy) pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } + + is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + pipe_cnt++; } @@ -3053,8 +3056,7 @@ int dcn32_populate_dml_pipes_from_context( * the DET available for each pipe). Use the DET override input to maintain our driver * policy. */ - switch (pipe_cnt) { - case 1: + if (pipe_cnt == 1 && !is_pipe_split_expected[0]) { pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE; if (pipe->plane_state && !dc->debug.disable_z9_mpc) { if (!is_dual_plane(pipe->plane_state->format)) { @@ -3065,18 +3067,8 @@ int dcn32_populate_dml_pipes_from_context( pipes[0].pipe.src.det_size_override = 320; // 5K or higher } } - break; - case 2: - case 3: - case 4: - // For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 pipes use default size for each pipe - for (i = 0; i < pipe_cnt; i++) { - pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? (DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE; - } - break; - } - - dcn32_update_det_override_for_mpo(dc, context, pipes); + } else + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index db4546317cb5..10254ab7e9d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -100,7 +100,9 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes); +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); + +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt); #endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e001f6d1f6c3..a6ef1dba01fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -26,6 +26,8 @@ // header file of functions being implemented #include "dcn32_resource.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn32/display_mode_vba_util_32.h" + /** * ******************************************************************************************** * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx @@ -195,66 +197,68 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -/* For MPO we adjust the DET allocation to ensure we have enough DET buffer when an MPO pipe - * is removed. For example for 1 MPO + 1 non-MPO normally we would allocate 6 DET segments - * for each pipe [6, 6, 6]. But when transitioning out of MPO it would change from - * [6, 6, 6] -> [9, 9]. However, if VUPDATE for the non-MPO pipe comes first we would be - * trying to allocate more DET than what's currently available which would result in underflow. - * - * In this case we must ensure there is enough buffer when transitioning in and out of MPO: - * - * 1 MPO (2 plane) + 1 non-MPO case: - * [4, 4, 9]<->[9, 9]: Allocate 4 each for MPO pipes, and maintain 9 for non-MPO pipe - * - * 1 MPO (2 plane) + 2 non-MPO case: - * [3, 3, 5, 5]<->[6, 6, 6] - * - * 1 MPO (3 plane) + 1 non-MPO case: - * [3, 3, 3, 9]<->[4, 4, 9] or [3, 3, 3, 6]<->[9, 9] - * - * For multi-display MPO case all pipes will have 4 segments: - * Removing MPO on one of the displays will result in 3 pipes - * (1 MPO and 1 non-MPO which is covered by single MPO stream case). - */ -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes) +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) { - uint8_t i, mpo_stream_index, pipe_cnt; - uint8_t mpo_stream_count = 0; - uint8_t mpo_planes = 0; // Only used in single display MPO case - unsigned int j; - struct resource_context *res_ctx = &context->res_ctx; + double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, clock, + clk_frequency = 0.0, vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count > 1) { - mpo_stream_index = i; - mpo_stream_count++; - mpo_planes = context->stream_status[i].plane_count; - } - } + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, + pipe.scale_ratio_depth.hscl_ratio_c, + pipe.scale_ratio_depth.vscl_ratio, + pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe.dest.pixel_rate_mhz, + pipe.src.source_format, + pipe.scale_taps.htaps, + pipe.scale_taps.htaps_c, + pipe.scale_taps.vtaps, + pipe.scale_taps.vtaps_c, - if (mpo_stream_count == 1) { - for (j = 0, pipe_cnt = 0; j < dc->res_pool->pipe_count; j++) { - if (!res_ctx->pipe_ctx[j].stream) - continue; + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); - if (context->res_ctx.pipe_ctx[j].stream == context->streams[mpo_stream_index]) { - // For 3 plane MPO + 1 non-MPO, do [3, 3, 3, 9] - // For 2 plane MPO + 1 non-MPO, do [4, 4, 9] - if (context->stream_count - mpo_stream_count == 1) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * (mpo_planes == 2 ? 4 : 3); - else if (context->stream_count - mpo_stream_count == 2) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 3; + clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); + + if (clock > 0) + clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0)); + + if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) + return true; + else + return false; +} + +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt) +{ + int i, j, count, stream_segments, pipe_segments[MAX_PIPES]; + + if (context->stream_count > 0) { + stream_segments = 18 / context->stream_count; + for (i = 0, count = 0; i < context->stream_count; i++) { + for (j = 0; j < pipe_cnt; j++) { + if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) { + count++; + if (is_pipe_split_expected[j]) + count++; + } + } + pipe_segments[i] = stream_segments / count; + } - } else if (context->res_ctx.pipe_ctx[j].stream && - context->res_ctx.pipe_ctx[j].stream != context->streams[mpo_stream_index]) { - // Update for non-MPO pipes - if (context->stream_count - mpo_stream_count == 1) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 9; - else if (context->stream_count - mpo_stream_count == 2) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 5; + for (i = 0; i < pipe_cnt; i++) { + pipes[i].pipe.src.det_size_override = 0; + for (j = 0; j < context->stream_count; j++) { + if (context->res_ctx.pipe_ctx[i].stream == context->streams[j]) { + pipes[i].pipe.src.det_size_override = pipe_segments[j] * DCN3_2_DET_SEG_SIZE; + break; + } } - pipe_cnt++; } + } else { + for (i = 0; i < pipe_cnt; i++) + pipes[i].pipe.src.det_size_override = 4 * DCN3_2_DET_SEG_SIZE; //DCN3_2_DEFAULT_DET_SIZE } } -- cgit v1.2.3 From 557f9100c74cfa033488563240f567466613cefa Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:44:58 -0300 Subject: drm/amd/display: Remove unused clk_src variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the variable clk_src from the function dcn3_get_pix_clk_dividers. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_clock_source.c:1279:25: warning: variable 'clk_src' set but not used [-Wunused-but-set-variable] struct dce110_clk_src *clk_src; ^ 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5cc7cc0b2f2d..d55da1ab1ac2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1276,9 +1276,7 @@ static uint32_t dcn3_get_pix_clk_dividers( struct pll_settings *pll_settings) { unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0; - struct dce110_clk_src *clk_src; - clk_src = TO_DCE110_CLK_SRC(cs); DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL -- cgit v1.2.3 From d3e19f76121178420c3efc9688adfd5a448996e0 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:44:59 -0300 Subject: drm/amd/display: Remove unused dml32_CalculatedoublePipeDPPCLKAndSCLThroughput function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove dml32_CalculatedoublePipeDPPCLKAndSCLThroughput function, which is not used in the codebase. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:393:6: warning: no previous prototype for function 'dml32_CalculatedoublePipeDPPCLKAndSCLThroughput' [-Wmissing-prototypes] void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:393:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( ^ static 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_util_32.c | 54 ---------------------- 1 file changed, 54 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index febaff7d7343..67cbc7923652 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -390,60 +390,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( - double HRatio, - double HRatioChroma, - double VRatio, - double VRatioChroma, - double MaxDCHUBToPSCLThroughput, - double MaxPSCLToLBThroughput, - double PixelClock, - enum source_format_class SourcePixelFormat, - unsigned int HTaps, - unsigned int HTapsChroma, - unsigned int VTaps, - unsigned int VTapsChroma, - - /* output */ - double *PSCL_THROUGHPUT, - double *PSCL_THROUGHPUT_CHROMA, - double *DPPCLKUsingdoubleDPP) -{ - double DPPCLKUsingdoubleDPPLuma; - double DPPCLKUsingdoubleDPPChroma; - - if (HRatio > 1) { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / - dml_ceil((double) HTaps / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - - DPPCLKUsingdoubleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / - *PSCL_THROUGHPUT, 1); - - if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingdoubleDPPLuma < 2 * PixelClock) - DPPCLKUsingdoubleDPPLuma = 2 * PixelClock; - - if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && - SourcePixelFormat != dm_rgbe_alpha)) { - *PSCL_THROUGHPUT_CHROMA = 0; - *DPPCLKUsingdoubleDPP = DPPCLKUsingdoubleDPPLuma; - } else { - if (HRatioChroma > 1) { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * - HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - DPPCLKUsingdoubleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), - HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); - if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingdoubleDPPChroma < 2 * PixelClock) - DPPCLKUsingdoubleDPPChroma = 2 * PixelClock; - *DPPCLKUsingdoubleDPP = dml_max(DPPCLKUsingdoubleDPPLuma, DPPCLKUsingdoubleDPPChroma); - } -} - void dml32_CalculateSwathAndDETConfiguration( unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], -- cgit v1.2.3 From 4f5a17b221cc48d66cce13fe1531ca8fc99078b9 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:45:00 -0300 Subject: drm/amd/display: Remove unused NumberOfStates variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused unsigned int NumberOfStates from the file, which was declared but never hooked up. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:33:27: warning: unused variable 'NumberOfStates' [-Wunused-const-variable] static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; ^ 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9944f58f0db9..1efce9f5eae3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -30,8 +30,6 @@ #include "../dml_inline_defs.h" #include "display_mode_vba_util_32.h" -static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; - void dml32_recalculate(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( struct display_mode_lib *mode_lib); -- cgit v1.2.3 From 1cccdfe21eb6b8dd4371378b4b61223eb37f1ef7 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:45:01 -0300 Subject: drm/amd/display: Remove unused variables from dml_rq_dlg_get_dlg_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the variables dispclk_delay_subtotal and dppclk_delay_subtotal from the function dml_rq_dlg_get_dlg_params. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_rq_dlg_calc_31.c:920:15: warning: variable 'dispclk_delay_subtotal' set but not used [-Wunused-but-set-variable] unsigned int dispclk_delay_subtotal; ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_rq_dlg_calc_31.c:919:15: warning: variable 'dppclk_delay_subtotal' set but not used [-Wunused-but-set-variable] unsigned int dppclk_delay_subtotal; ^ 2 warnings generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index c94cf6e01e25..66b82e4f05c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -866,7 +866,6 @@ static void dml_rq_dlg_get_dlg_params( { const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; - const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; @@ -916,9 +915,6 @@ static void dml_rq_dlg_get_dlg_params( unsigned int vupdate_width; unsigned int vready_offset; - unsigned int dppclk_delay_subtotal; - unsigned int dispclk_delay_subtotal; - unsigned int vstartup_start; unsigned int dst_x_after_scaler; unsigned int dst_y_after_scaler; @@ -1037,21 +1033,6 @@ static void dml_rq_dlg_get_dlg_params( vupdate_width = dst->vupdate_width; vready_offset = dst->vready_offset; - dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; - dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; - - if (scl_enable) - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; - else - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; - - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; - - if (dout->dsc_enable) { - double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA - dispclk_delay_subtotal += dsc_delay; - } - vstartup_start = dst->vstartup_start; if (interlaced) { if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0) -- cgit v1.2.3 From 54c3e9493cd502d63ff3643fa70b5f98b3201846 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:45:03 -0300 Subject: drm/amd/display: Remove unused variables from dcn10_stream_encoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable regval from the function enc1_update_generic_info_packet and the variables dynamic_range_rgb and dynamic_range_ycbcr from the function enc1_stream_encoder_dp_set_stream_attribute are not currently used. This was pointed by clang with the following warnings: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_stream_encoder.c:62:11: warning: variable 'regval' set but not used [-Wunused-but-set-variable] uint32_t regval; ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_stream_encoder.c:262:10: warning: variable 'dynamic_range_rgb' set but not used [-Wunused-but-set-variable] uint8_t dynamic_range_rgb = 0; /*full range*/ ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_stream_encoder.c:263:10: warning: variable 'dynamic_range_ycbcr' set but not used [-Wunused-but-set-variable] uint8_t dynamic_range_ycbcr = 1; /*bt709*/ ^ 3 warnings generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index c99c6fababa9..484e7cdf00b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -59,7 +59,6 @@ void enc1_update_generic_info_packet( uint32_t packet_index, const struct dc_info_packet *info_packet) { - uint32_t regval; /* TODOFPGA Figure out a proper number for max_retries polling for lock * use 50 for now. */ @@ -88,7 +87,6 @@ void enc1_update_generic_info_packet( REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); /* choose which generic packet to use */ - regval = REG_READ(AFMT_VBI_PACKET_CONTROL); REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, packet_index); @@ -259,8 +257,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( uint32_t h_back_porch; uint8_t synchronous_clock = 0; /* asynchronous mode */ uint8_t colorimetry_bpc; - uint8_t dynamic_range_rgb = 0; /*full range*/ - uint8_t dynamic_range_ycbcr = 1; /*bt709*/ uint8_t dp_pixel_encoding = 0; uint8_t dp_component_depth = 0; @@ -372,18 +368,15 @@ void enc1_stream_encoder_dp_set_stream_attribute( switch (output_color_space) { case COLOR_SPACE_SRGB: misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_rgb = 0; /*full range*/ break; case COLOR_SPACE_SRGB_LIMITED: misc0 = misc0 | 0x8; /* bit3=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_rgb = 1; /*limited range*/ break; case COLOR_SPACE_YCBCR601: case COLOR_SPACE_YCBCR601_LIMITED: misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_ycbcr = 0; /*bt601*/ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) @@ -393,15 +386,12 @@ void enc1_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_YCBCR709_LIMITED: misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_ycbcr = 1; /*bt709*/ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: - dynamic_range_rgb = 1; /*limited range*/ - break; case COLOR_SPACE_2020_RGB_FULLRANGE: case COLOR_SPACE_2020_YCBCR: case COLOR_SPACE_XR_RGB: -- cgit v1.2.3 From 869618c3440227f848ff9ac55aa64d523a60476e Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 14 Jul 2022 13:45:04 -0300 Subject: drm/amd/display: Remove unused MaxUsedBW variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the variable MaxUsedBW from the function DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation. As a side-effect, the variables MaxPerPlaneVActiveWRBandwidth and WRBandwidth are also removed. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3043:10: warning: variable 'MaxUsedBW' set but not used [-Wunused-but-set-variable] double MaxUsedBW = 0; ^ 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn30/display_mode_vba_30.c | 28 ---------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 842eb94ebe04..876b321b30ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -3037,40 +3037,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double MaxPerPlaneVActiveWRBandwidth = 0; - double WRBandwidth = 0; - double MaxUsedBW = 0; - for (k = 0; k < v->NumberOfActivePlanes; ++k) { - if (v->WritebackEnable[k] == true - && v->WritebackPixelFormat[k] == dm_444_32) { - WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] - / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; - } else if (v->WritebackEnable[k] == true) { - WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] - / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; - } - TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; - MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); - } - v->TotalDataReadBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; } - - { - double MaxPerPlaneVActiveRDBandwidth = 0; - for (k = 0; k < v->NumberOfActivePlanes; ++k) { - MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth, - v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); - - } - } - - MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth; } // VStartup Margin -- cgit v1.2.3 From fbcc38811fcb47335899a3776b4fd5670db4e228 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Sat, 16 Jul 2022 18:51:44 -0100 Subject: drm/amd/display: move dcn31_update_soc_for_wm_a func to dml fpu folder Although dcn31_update_soc_for_wm_a() is only called in dml/dcn31/dcn31_fpu by dc->res_pool->funcs->update_soc_for_wm_a(dc, context), it's declared in dcn31_resource that is not FPU protected. Move this function to dcn31_fpu file as part of the work to isolate FPU code. Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 9 --------- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h | 1 - drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 2 ++ 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 5e924d0389cc..178d40c0d70a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1716,15 +1716,6 @@ int dcn31_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; - } -} - void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 393458015d6a..41f8ec99da6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -59,7 +59,6 @@ dcn31_set_mcif_arb_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt); -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index ba371769dc3e..450ebd838505 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -69,6 +69,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn314/dcn314_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 7be3476989ce..facac3daeaca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -435,6 +435,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; + } +} + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index 24ac19c83687..0a10de80c1a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -31,6 +31,8 @@ #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_DEFAULT_DET_SIZE 192 +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, -- cgit v1.2.3 From 4686177f7d2140cdd9d031702c2b53ac4c89340a Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 14 Jul 2022 16:17:44 -0300 Subject: drm/amd/debugfs: Expose GFXOFF state to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GFXOFF has two different "state" values: one to define if the GPU is allowed/disallowed to enter GFXOFF, usually called state; and another one to define if currently GFXOFF is being used, usually called status. Even when GFXOFF is allowed, GPU firmware can decide to not used it accordingly to the GPU load. Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The kernel maintains a counter of requests for GFXOFF (gfx_off_req_count) that should be decreased to allow GFXOFF and increased to disallow. The issue with this interface is that userspace can't be sure if GFXOFF is currently allowed. Even by checking amdgpu_gfxoff file, one might get an ambiguous 2, that means that GPU is currently out of GFXOFF, but that can be either because it's currently disallowed or because it's allowed but given the current GPU load it's enabled. Then, userspace needs to rely on the fact that GFXOFF is enabled by default on boot and to track this information. To make userspace life easier and GFXOFF more reliable, return the current state of GFXOFF to userspace when reading amdgpu_gfxoff with the same semantics of writing: 0 means not allowed, not 0 means allowed. Expose the current status of GFXOFF through a new file, amdgpu_gfxoff_status. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 +++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f3b3c688e4e7..e2eec985adb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, } while (size) { - uint32_t value; + u32 value = adev->gfx.gfx_off_state; + + r = put_user(value, (u32 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; r = amdgpu_get_gfx_off_status(adev, &value); if (r) goto out; - r = put_user(value, (uint32_t *)buf); + r = put_user(value, (u32 *)buf); if (r) goto out; @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_status_read, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_wave_fops, &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, + &amdgpu_debugfs_gfxoff_status_fops, }; static const char *debugfs_regs_names[] = { @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = { "amdgpu_wave", "amdgpu_gpr", "amdgpu_gfxoff", + "amdgpu_gfxoff_status", }; /** -- cgit v1.2.3 From 7a06e125872929247f78f363d1dc2dbd528631ab Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 14 Jul 2022 16:17:45 -0300 Subject: Documentation/gpu: Add GFXOFF section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a GFXOFF section at "GPU Power Controls" file, explaining what it is and how userspace can interact with it. v2: minor tweaks to the documenation (Alex) Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 8aeb0186c9ef..997231b6adcf 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -63,3 +63,44 @@ gpu_metrics .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: gpu_metrics + +GFXOFF +====== + +GFXOFF is a feature found in most recent GPUs that saves power at runtime. The +card's RLC (RunList Controller) firmware powers off the gfx engine +dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by +default on supported GPUs. + +Userspace can interact with GFXOFF through a debugfs interface: + +``amdgpu_gfxoff`` +----------------- + +Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff + 01 + +- Write 0 to disable it, and 1 to enable it. +- Read 0 means it's disabled, 1 it's enabled. + +If it's enabled, that means that the GPU is free to enter into GFXOFF mode as +needed. Disabled means that it will never enter GFXOFF mode. + +``amdgpu_gfxoff_status`` +------------------------ + +Read it to check current GFXOFF's status of a GPU:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status + 02 + +- 0: GPU is in GFXOFF state, the gfx engine is powered down. +- 1: Transition out of GFXOFF state +- 2: Not in GFXOFF state +- 3: Transition into GFXOFF state + +If GFXOFF is enabled, the value will be transitioning around [0, 3], always +getting into 0 when possible. When it's disabled, it's always at 2. Returns +``-EINVAL`` if it's not supported. -- cgit v1.2.3 From 7b5b0d196c2e6cad87cb1e3ce285c7885f2bd796 Mon Sep 17 00:00:00 2001 From: Vladimir Stempen Date: Wed, 6 Jul 2022 15:57:12 -0400 Subject: drm/amd/display: Disable GPUVM in IP resource configuration [Why] VM enabled in IP configuration causes UCLK not reaching DPM0. The expectation for VM enable should be that KMD will indicate to DAL when VM is enabled, then DAL will set the bit accordingly [How] Set gpuvm_enable to zero in DCN3_20 and DCN3_21 resource. Tested-by: Daniel Wheeler Reviewed-by: Martin Leung Acked-by: Alan Liu Signed-off-by: Vladimir Stempen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 631876832dfa..0cb44ea9753b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -120,7 +120,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DCN3_2_MIN_COMPBUF_SIZE_KB 128 struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 1, + .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, .hostvm_enable = 0, .rob_buffer_size_kbytes = 128, diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ebbeebf972dc..d218c6dd71aa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -123,7 +123,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DCN3_2_DEFAULT_DET_SIZE 256 struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 1, + .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, .hostvm_enable = 0, .rob_buffer_size_kbytes = 128, -- cgit v1.2.3 From 660f46e16c4b8a34978012a9f10a32a16db3e98f Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 8 Jul 2022 13:32:46 -0400 Subject: drm/amd/display: Loop through all pipes for DET allocation [Why & How] There are cases where the pipes populated are not all at the top of the pipes list under context. Loop through all pipes for DET allocation instead of just the number of populated ones, even if some unpopulated pipes are iterated through unnecessarily. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 0cb44ea9753b..32da47e24839 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3068,7 +3068,7 @@ int dcn32_populate_dml_pipes_from_context( } } } else - dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt); + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, dc->res_pool->pipe_count); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false -- cgit v1.2.3 From f3cd57e499e6904b7e356d11bd33d617341b3f24 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 19 Jul 2022 11:55:07 -0400 Subject: drm/amd/display: Disable dmcu fw loading for dcn314 [Why] DCN 3.1.4 uses dmub not dmcu. Attempt to identify dmcu firmware for dcn314 results in dm init error: "Unsupported ASIC type" [How] Add dcn314 to the list of asics that don't require dmcu Signed-off-by: Roman Li Reviewed-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4e51b06fcdd9..a9f9c929dca6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1805,6 +1805,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): -- cgit v1.2.3 From 869b10ac8d2300327f554d83f4dbab041bf27d49 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 13 Jun 2022 12:21:59 -0400 Subject: drm/amdgpu: add dm ip block for dcn 3.1.4 Adding dm ip block to enable display on dcn 3.1.4. Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0ba56e2ebf09..242d1847c4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1716,6 +1716,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): -- cgit v1.2.3 From 4d37fd51b4440cf6a02942c0a169ee18a902fb5b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 16 Jul 2022 11:57:32 +0800 Subject: drm/radeon: Fix comment typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The double `have' is duplicated in line 696, remove one. Reviewed-by: Christian König Signed-off-by: Jason Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 84843b3b3aef..261fcbae88d7 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -693,7 +693,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, } /* !! DONT REMOVE !! - * We don't support vm_id yet, to be sure we don't have have broken + * We don't support vm_id yet, to be sure we don't have broken * userspace, reject anyone trying to use non 0 value thus moving * forward we can use those fields without breaking existant userspace */ -- cgit v1.2.3 From c19a23fadd279f433424b4d6436fe4ab0020e20c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 16 Jul 2022 12:28:41 +0800 Subject: drm/amdgpu: Fix comment typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The double `to' is duplicated in the comment, remove one. Reviewed-by: Christian König Signed-off-by: Jason Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4c66aff11a40..52d1fd7d8e81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. - * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. -- cgit v1.2.3 From 37edc99979b717312e60cf3463ab756f5a3d6de6 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 8 Jul 2022 15:36:18 -0400 Subject: drm/amd/display: Update Cursor Attribute MALL cache [Why] Cursor size can update without MALL cache update. Update the register on cursor attribute as well. [How] Update cursor MALL cache on cursor attribute update. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Chris Park Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 40 ++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h | 3 ++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 0a7d64306481..3176b04a7740 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -94,6 +94,44 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp) } } +void hubp32_cursor_set_attributes( + struct hubp *hubp, + const struct dc_cursor_attributes *attr) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch); + enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk( + attr->width, attr->color_format); + + hubp->curs_attr = *attr; + + REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, + CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); + REG_UPDATE(CURSOR_SURFACE_ADDRESS, + CURSOR_SURFACE_ADDRESS, attr->address.low_part); + + REG_UPDATE_2(CURSOR_SIZE, + CURSOR_WIDTH, attr->width, + CURSOR_HEIGHT, attr->height); + + REG_UPDATE_4(CURSOR_CONTROL, + CURSOR_MODE, attr->color_format, + CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, + CURSOR_PITCH, hw_pitch, + CURSOR_LINES_PER_CHUNK, lpc); + + REG_SET_2(CURSOR_SETTINGS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, + /* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); + + if (attr->width * attr->height * 4 > 16384) + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); + else + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); +} + static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -106,7 +144,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, - .set_cursor_attributes = hubp2_cursor_set_attributes, + .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, .hubp_clk_cntl = hubp2_clk_cntl, .hubp_vtg_sel = hubp2_vtg_sel, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h index 00b4211389c2..c4315d50fbb0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h @@ -58,6 +58,9 @@ void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable); void hubp32_phantom_hubp_post_enable(struct hubp *hubp); +void hubp32_cursor_set_attributes(struct hubp *hubp, + const struct dc_cursor_attributes *attr); + bool hubp32_construct( struct dcn20_hubp *hubp2, struct dc_context *ctx, -- cgit v1.2.3 From f4b4e41a2e05270cd90c5817ab514ace95555874 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Fri, 24 Jun 2022 16:28:50 -0400 Subject: drm/amd/display: Update DML logic for unbounded req handling [why] Unbounded request logic in resource/DML has some issues where unbounded request is being enabled incorrectly. SW today enables unbounded request unconditionally in hardware, on the assumption that HW can always support it in single pipe scenarios. This worked until now because the same assumption is made in DML. A new DML update is needed to fix a bug, where there are single pipe scenarios where unbounded cannot be enabled, and this change in DML needs to be ported in, and dcn32 resource logic fixed. [how] First, dcn32_resource should program unbounded req in HW according to unbounded req enablement output from DML, as opposed to DML input. Second, port in DML update which disables unbounded req in some scenarios to fix an issue with poor stutter performance Tested-by: Daniel Wheeler Reviewed-by: Rodrigo Siqueira Signed-off-by: Jun Lei Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 11 ++++- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 44 ++++++++++++++++--- .../dc/dml/dcn32/display_mode_vba_util_32.c | 51 +++++++++++++++++++--- .../dc/dml/dcn32/display_mode_vba_util_32.h | 10 ++++- .../gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 + 5 files changed, 103 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 32da47e24839..39214a0dcdf2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3322,6 +3322,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display { int i, pipe_idx; bool usr_retraining_support = false; + bool unbounded_req_enabled = false; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3357,6 +3358,14 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; @@ -3375,7 +3384,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display } else { context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; } if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 1efce9f5eae3..e9204c711cb9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -224,6 +224,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation @@ -285,6 +288,10 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.DETBufferSizeC, &v->UnboundedRequestEnabled, &v->CompressedBufferSizeInkByte, + &v->CompBufReservedSpaceKBytes, + &v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */ v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */ @@ -293,6 +300,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman .dummy_boolean); /* bool *ViewportSizeSupport */ } + v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0; + v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0; + // DCFCLK Deep Sleep dml32_CalculateDCFCLKDeepSleep( mode_lib->vba.NumberOfActiveSurfaces, @@ -1530,8 +1540,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->TotalDataReadBandwidth, mode_lib->vba.DCFCLK, mode_lib->vba.ReturnBW, - mode_lib->vba.CompbufReservedSpace64B, - mode_lib->vba.CompbufReservedSpaceZs, + v->CompbufReservedSpace64B, + v->CompbufReservedSpaceZs, mode_lib->vba.SRExitTime, mode_lib->vba.SRExitZ8Time, mode_lib->vba.SynchronizeTimingsFinal, @@ -1596,8 +1606,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->TotalDataReadBandwidth, mode_lib->vba.DCFCLK, mode_lib->vba.ReturnBW, - 0, //mode_lib->vba.CompbufReservedSpace64B, - 0, //mode_lib->vba.CompbufReservedSpaceZs, + 0, //CompbufReservedSpace64B, + 0, //CompbufReservedSpaceZs, mode_lib->vba.SRExitTime, mode_lib->vba.SRExitZ8Time, mode_lib->vba.SynchronizeTimingsFinal, @@ -1659,6 +1669,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { unsigned int dummy_integer[4]; + bool dummy_boolean[2]; bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; bool MPCCombineMethodAsPossible; enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; @@ -1673,6 +1684,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l bool SubViewportMALLPStateMethod; bool PhantomPipeMALLPStateMethod; unsigned int MaximumMPCCombine; + bool CompBufReservedSpaceNeedAdjustment; + bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); @@ -1905,6 +1918,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, mode_lib->vba.Output, mode_lib->vba.ReadBandwidthLuma, @@ -1952,6 +1968,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */ + &CompBufReservedSpaceNeedAdjustmentSingleDPP, mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ @@ -2120,9 +2138,18 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } + // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless + // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision + CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP; + + + if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, - mode_lib->vba.Output[0])) { + mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, + mode_lib->vba.Output[0], + mode_lib->vba.SurfaceTiling[0], + CompBufReservedSpaceNeedAdjustment, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; @@ -2500,6 +2527,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, mode_lib->vba.Output, mode_lib->vba.ReadBandwidthLuma, @@ -2546,6 +2576,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, + &dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 67cbc7923652..c8a3f367d622 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -400,6 +400,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -447,6 +450,8 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { @@ -465,6 +470,8 @@ void dml32_CalculateSwathAndDETConfiguration( #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); + dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); #endif dml32_CalculateSwathWidth(ForceSingleDPP, NumberOfActiveSurfaces, @@ -534,8 +541,24 @@ void dml32_CalculateSwathAndDETConfiguration( } } - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, - NoChromaSurfaces, Output[0]); + // By default, just set the reserved space to 2 pixel chunks size + *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; + + // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data + // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] + // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); + + if (*CompBufReservedSpaceNeedAdjustment == 1) { + *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; + } + + #ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); + #endif + + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -853,9 +876,12 @@ void dml32_CalculateSwathWidth( } // CalculateSwathWidth bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, - unsigned int TotalNumberOfActiveDPP, - bool NoChroma, - enum output_encoder_class Output) + unsigned int TotalNumberOfActiveDPP, + bool NoChroma, + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) { bool ret_val = false; @@ -863,7 +889,20 @@ bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequest TotalNumberOfActiveDPP == 1 && NoChroma); if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) ret_val = false; - return ret_val; + + if (SurfaceTiling == dm_sw_linear) + ret_val = false; + + if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) + ret_val = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); +#endif + + return (ret_val); } void dml32_CalculateDETBufferSize( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 72461b934ee0..d293856ba906 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -90,6 +90,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -137,6 +140,8 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport); @@ -181,7 +186,10 @@ void dml32_CalculateSwathWidth( bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, unsigned int TotalNumberOfActiveDPP, bool NoChroma, - enum output_encoder_class Output); + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); void dml32_CalculateDETBufferSize( unsigned int DETSizeOverride[], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index de78bb8489cb..503e7d984ff0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -111,6 +111,7 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc); dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth); dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip); + dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte); dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte); -- cgit v1.2.3 From 26749aa8d1261bd6f2db9d019276d4277dde7df8 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 7 Jul 2022 17:17:25 -0400 Subject: drm/amd/display: Revert "drm/amd/display: disable idle optimizations" This reverts commit e7ef5569e71bf3fec01ea513c27c6081c0dbbc64. Idle optimization was disabled due to SMU and firmware bugs. Enable it back for DCN32. DCN321 has them enabled already Fixes: 6a640b95b061 ("drm/amd/display: disable idle optimizations") Signed-off-by: Aurabindo Pillai Reviewed-and-tested-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 39214a0dcdf2..1b499f42f459 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -972,7 +972,6 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .disable_idle_power_optimizations = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, -- cgit v1.2.3 From 041a11095abdd52b38f1ea1355357ecd2b66c0e2 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jul 2022 10:11:08 -0400 Subject: drm/amd/display: Drop FPU flags from dcn32_clk_mgr We are working to isolate FPU operations inside the DML folder, and the file dcn32_clk_mgr has some of these operations. This commit moves the FPU operations inside the clock manager and creates the dcn32_fpu file to aggregate those operations. Note that there is no functional change ere, just moving code from one part to another. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 25 ----- .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 81 +-------------- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 113 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 34 +++++++ 5 files changed, 153 insertions(+), 102 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 053084121db2..a48453612d10 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -188,31 +188,6 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32)) -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2 -endif -endif - AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 5b87f937554d..c6785969eb1a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -42,6 +42,7 @@ #include "dcn/dcn_3_2_0_sh_mask.h" #include "dcn32/dcn32_clk_mgr.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x000000C0 @@ -146,83 +147,9 @@ static void dcn32_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e cl static void dcn32_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - /* For min clocks use as reported by PM FW and report those as min */ - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - uint16_t setb_min_uclk_mhz = min_uclk_mhz; - uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; - - /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ - if (dcfclk_mhz_for_the_second_state) - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; - else - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - - if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) - setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; - - /* Set A - Normal - default values */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ - if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; - clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - } - /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ - /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us; // TBD - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; // TBD - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; + DC_FP_START(); + dcn32_build_wm_range_table_fpu(clk_mgr); + DC_FP_END(); } void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index c48688cdd7f7..01cb0ef3a2b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -72,6 +72,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) @@ -124,6 +125,7 @@ DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_3 DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o +DML += dcn32/dcn32_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c new file mode 100644 index 000000000000..89b596599c3d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "dcn32_fpu.h" + +// We need this includes for WATERMARKS_* defines +#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) +{ + /* defaults */ + double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; + double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; + double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + uint16_t setb_min_uclk_mhz = min_uclk_mhz; + uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; + + dc_assert_fp_enabled(); + + /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ + if (dcfclk_mhz_for_the_second_state) + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; + else + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + + if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) + setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + } + /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ + /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h new file mode 100644 index 000000000000..72a6dd75af0e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "clk_mgr_internal.h" + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); + +#endif -- cgit v1.2.3 From af14e7c2fc9b60af70b410a7dace116eaa5e4e65 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 May 2022 18:09:11 -0400 Subject: drm/amdgpu: add the IP discovery IP versions for HW INFO data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the former pad element to store the IP versions from the IP discovery table. This allows userspace to get the IP version from the kernel to better align with hardware IP versions. Proposed mesa patch: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411/diffs?commit_id=c8a63590dfd0d64e6e6a634dcfed993f135dd075 Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 24 ++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b8ba59c93fc0..1369c25448dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -461,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, result->hw_ip_version_major = adev->ip_blocks[i].version->major; result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + + if (adev->asic_type >= CHIP_VEGA10) { + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_SDMA: + result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_JPEG: + result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_VCE: + result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + break; + default: + result->ip_discovery_version = 0; + break; + } + } else { + result->ip_discovery_version = 0; + } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; result->ib_start_alignment = ib_start_alignment; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 63de71f53110..c2c9c674a223 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1097,7 +1097,8 @@ struct drm_amdgpu_info_hw_ip { __u32 ib_size_alignment; /** Bitmask of available rings. Bit 0 means ring 0, etc. */ __u32 available_rings; - __u32 _pad; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; }; struct drm_amdgpu_info_num_handles { -- cgit v1.2.3 From 465576ca481caa9cf18550652efdc6b103701c68 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 20 May 2022 10:59:35 -0400 Subject: drm/amdgpu: bump driver version for IP discovery info in HW INFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So userspace knows when it is available. Proposed mesa patch: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411/diffs?commit_id=c8a63590dfd0d64e6e6a634dcfed993f135dd075 Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 52d1fd7d8e81..30f8c46f16b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -100,10 +100,11 @@ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface * - 3.46.0 - To enable hot plug amdgpu tests in libdrm - * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.48.0 - Add IP discovery version info to HW INFO */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 47 +#define KMS_DRIVER_MINOR 48 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; -- cgit v1.2.3 From f7bacd97af853a9bba9bb8d1baa12f997e60122f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jul 2022 16:03:40 -0400 Subject: drm/amd/display: Move populate phaton function to dml The function dcn32_helper_populate_phantom_dlg_params uses FPU operations. For this reason, this commit moves this function to the dcn32_fpu file, and we ensure that we only invoke it under the kernel_fpu protection. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 7 ++++ .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 44 ---------------------- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 43 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 5 +++ 4 files changed, 55 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 1b499f42f459..efbae88c492a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -88,6 +88,7 @@ #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x000000C0 #define DCN_BASE__INST0_SEG2 0x000034C0 @@ -312,6 +313,7 @@ enum dcn32_clk_src_array_id { .reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ regBIF_BX0_ ## reg_name +#undef CTX #define CTX ctx #define REG(reg_name) \ (DCN_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -2666,6 +2668,11 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, memset(merge, 0, MAX_PIPES * sizeof(bool)); *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + DC_FP_START(); + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + DC_FP_END(); + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait // until driver has acquired the DMCUB lock to do it safely. } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index a6ef1dba01fe..633d3ee18cfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -28,50 +28,6 @@ #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" -/** - * ******************************************************************************************** - * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx - * with those params. - * - * This function must be called AFTER the phantom pipes are added to context and run through DML - * (so that the DLG params for the phantom pipes can be populated), and BEFORE we program the - * timing for the phantom pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] pipes: DML pipe params array - * @param [in] pipe_cnt: DML pipe count - * - * @return: void - * - * ******************************************************************************************** - */ -void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - uint32_t i, pipe_idx; - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->stream) - continue; - - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; - } - pipe_idx++; - } -} - /** * ******************************************************************************************** * dcn32_helper_calculate_num_ways_for_subvp: Calculate number of ways needed for SubVP diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 89b596599c3d..253ff9659b0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -111,3 +111,46 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; } +/** + * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes + * and populate pipe_ctx with those params. + * + * This function must be called AFTER the phantom pipes are added to context + * and run through DML (so that the DLG params for the phantom pipes can be + * populated), and BEFORE we program the timing for the phantom pipes. + * + * @dc: [in] current dc state + * @context: [in] new dc state + * @pipes: [in] DML pipe params array + * @pipe_cnt: [in] DML pipe count + */ +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + uint32_t i, pipe_idx; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipes[pipe_idx].pipe.dest.vstartup_start = + get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = + get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = + get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = + get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; + } + pipe_idx++; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 72a6dd75af0e..492f99b6d561 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -31,4 +31,9 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); + #endif -- cgit v1.2.3 From 792a0cdde34d417bc2c8266d8015c5fd58d44a0d Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 6 Jul 2022 14:48:52 -0400 Subject: drm/amd/display: Add visualconfirm module parameter [Why] Being able to configure visual confirm at boot or in cmdline is helpful when debugging. [How] Add a module parameter to configure DC visual confirm, which works the same way as the equivalent debugfs entry. Signed-off-by: Leo Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3aa8ae1bc35a..b075845a5328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; +extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 30f8c46f16b4..429fcdf28836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -168,6 +168,7 @@ int amdgpu_smu_pptable_id = -1; */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; +uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp; int amdgpu_discovery = -1; @@ -828,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); +MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)"); +module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); + /** * DOC: abmlevel (uint) * Override the default ABM (Adaptive Backlight Management) level used for DC diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a9f9c929dca6..22a3f8972705 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1538,6 +1538,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH) adev->dm.dc->debug.force_subvp_mclk_switch = true; + adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; + r = dm_dmub_hw_init(adev); if (r) { DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); -- cgit v1.2.3 From 8813381a62e1f1703f8fbeccc5fa4fcc988be882 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 6 Jul 2022 14:56:28 -0400 Subject: drm/amd/display: Add dcdebugmask option for disabling MPO [Why & How] It's useful to disable MPO when debugging or testing. Therefore, add a dcdebugmask option to disable MPO. Signed-off-by: Leo Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 22a3f8972705..8660d93cc405 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4197,6 +4197,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) for (i = 0; i < dm->dc->caps.max_planes; ++i) { struct dc_plane_cap *plane = &dm->dc->caps.planes[i]; + /* Do not create overlay if MPO disabled */ + if (amdgpu_dc_debug_mask & DC_DISABLE_MPO) + break; + if (plane->type != DC_PLANE_TYPE_DCN_UNIVERSAL) continue; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1db21d13726d..f175e65b853a 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -249,6 +249,7 @@ enum DC_DEBUG_MASK { DC_DISABLE_CLOCK_GATING = 0x8, DC_DISABLE_PSR = 0x10, DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, + DC_DISABLE_MPO = 0x40, }; enum amd_dpm_forced_level; -- cgit v1.2.3 From ccc4200cfb2518fea042b16f090962b07314439b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 21 Jun 2022 14:06:56 +0800 Subject: drm/pm/swsmu: add ras eeprom i2c function for smu13 v13_0_0 Add ras eeprom i2c function for smu13 v13_0_0. Signed-off-by: YiPeng Chai Acked-by: Evan Quan Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 157 +++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index ce2fa04e3926..2b83191e8006 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -196,6 +196,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, + TAB_MAP(I2C_COMMANDS), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -1606,9 +1607,165 @@ static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) return true; } +static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) +{ + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap); + struct amdgpu_device *adev = smu_i2c->adev; + struct smu_context *smu = adev->powerplay.pp_handle; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; + + if (!adev->pm.dpm_enabled) + return -EBUSY; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->I2CcontrollerPort = smu_i2c->port; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; + cmd->ReadWriteData = msg[i].buf[j]; + } + + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } + + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } + } + mutex_lock(&adev->pm.mutex); + r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); + mutex_unlock(&adev->pm.mutex); + if (r) + goto fail; + + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; + } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; + + msg[i].buf[j] = cmd->ReadWriteData; + } + } + r = num_msgs; +fail: + kfree(req); + return r; +} + +static u32 smu_v13_0_0_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm smu_v13_0_0_i2c_algo = { + .master_xfer = smu_v13_0_0_i2c_xfer, + .functionality = smu_v13_0_0_i2c_func, +}; + +static const struct i2c_adapter_quirks smu_v13_0_0_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + +static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int res, i; + + for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + smu_i2c->adev = adev; + smu_i2c->port = i; + mutex_init(&smu_i2c->mutex); + control->owner = THIS_MODULE; + control->class = I2C_CLASS_SPD; + control->dev.parent = &adev->pdev->dev; + control->algo = &smu_v13_0_0_i2c_algo; + snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); + control->quirks = &smu_v13_0_0_i2c_control_quirks; + i2c_set_adapdata(control, smu_i2c); + + res = i2c_add_adapter(control); + if (res) { + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + goto Out_err; + } + } + + /* assign the buses used for the FRU EEPROM and RAS EEPROM */ + /* XXX ideally this would be something in a vbios data table */ + adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; + adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; + + return 0; +Out_err: + for ( ; i >= 0; i--) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + i2c_del_adapter(control); + } + return res; +} + +static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int i; + + for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + i2c_del_adapter(control); + } + adev->pm.ras_eeprom_i2c_bus = NULL; + adev->pm.fru_eeprom_i2c_bus = NULL; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, + .i2c_init = smu_v13_0_0_i2c_control_init, + .i2c_fini = smu_v13_0_0_i2c_control_fini, .is_dpm_running = smu_v13_0_0_is_dpm_running, .dump_pptable = smu_v13_0_0_dump_pptable, .init_microcode = smu_v13_0_init_microcode, -- cgit v1.2.3 From 25e751642a38204da189e0e239055702caeb461c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jul 2022 17:15:19 -0400 Subject: drm/amd/display: Move predict pipe to dml fpu folder The function dcn32_predict_pipe_split uses FPU operations. This commit moves this function to the dcn32_fpu file, and we ensure that we only invoke it under the kernel_fpu protection. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 ++ .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 2 -- .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 33 ------------------ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 39 +++++++++++++++++++++- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 4 +++ 5 files changed, 44 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index efbae88c492a..c5ba411d12ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3053,7 +3053,9 @@ int dcn32_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } + DC_FP_START(); is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + DC_FP_END(); pipe_cnt++; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 10254ab7e9d9..901aa7e13bd2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -100,8 +100,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); - void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 633d3ee18cfa..796e3d966a76 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -153,39 +153,6 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) -{ - double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, clock, - clk_frequency = 0.0, vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; - - dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, - pipe.scale_ratio_depth.hscl_ratio_c, - pipe.scale_ratio_depth.vscl_ratio, - pipe.scale_ratio_depth.vscl_ratio_c, - context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, - context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, - pipe.dest.pixel_rate_mhz, - pipe.src.source_format, - pipe.scale_taps.htaps, - pipe.scale_taps.htaps_c, - pipe.scale_taps.vtaps, - pipe.scale_taps.vtaps_c, - - /* Output */ - &pscl_throughput, &pscl_throughput_chroma, - &dpp_clk_single_dpp); - - clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); - - if (clock > 0) - clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0)); - - if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) - return true; - else - return false; -} - void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 253ff9659b0d..1b9e34f1232a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,7 +24,7 @@ * */ #include "dcn32_fpu.h" - +#include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" @@ -154,3 +154,40 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, } } +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) +{ + double pscl_throughput; + double pscl_throughput_chroma; + double dpp_clk_single_dpp, clock; + double clk_frequency = 0.0; + double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; + + dc_assert_fp_enabled(); + + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, + pipe.scale_ratio_depth.hscl_ratio_c, + pipe.scale_ratio_depth.vscl_ratio, + pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe.dest.pixel_rate_mhz, + pipe.src.source_format, + pipe.scale_taps.htaps, + pipe.scale_taps.htaps_c, + pipe.scale_taps.vtaps, + pipe.scale_taps.vtaps_c, + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); + + clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); + + if (clock > 0) + clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0)); + + if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) + return true; + else + return false; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 492f99b6d561..d5f157cdd0b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -36,4 +36,8 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, display_e2e_pipe_params_st *pipes, int pipe_cnt); +bool dcn32_predict_pipe_split(struct dc_state *context, + display_pipe_params_st pipe, + int index); + #endif -- cgit v1.2.3 From 34a65beccdea16a12d862e94d004ccf00de8cb3b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 20 Jul 2022 20:49:17 +0800 Subject: drm/amd/pm: update driver if header for smu_13_0_7 update driver if header for smu_13_0_7 Signed-off-by: Kenneth Feng Acked-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 24 ++++++++++++++-------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 132da684e379..25c08f963f49 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2A +#define SMU13_DRIVER_IF_VERSION 0x2C //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x1E +#define PPTABLE_VERSION 0x20 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -152,6 +152,7 @@ typedef enum { #define DEBUG_OVERRIDE_DISABLE_DFLL 0x00000200 #define DEBUG_OVERRIDE_ENABLE_RLC_VF_BRINGUP_MODE 0x00000400 #define DEBUG_OVERRIDE_DFLL_MASTER_MODE 0x00000800 +#define DEBUG_OVERRIDE_ENABLE_PROFILING_MODE 0x00001000 // VR Mapping Bit Defines #define VR_MAPPING_VR_SELECT_MASK 0x01 @@ -1014,8 +1015,8 @@ typedef struct { uint16_t Vmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at hot. uint16_t Vmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at cold. uint16_t Vmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Worst-case aging margin - uint16_t Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot - uint16_t Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold + uint16_t Spare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot + uint16_t Spare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold //This is a fixed/minimum VMIN aging degradation offset which is applied at T0. This reflects the minimum amount of aging already accounted for. uint16_t VcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT]; @@ -1081,11 +1082,15 @@ typedef struct { uint16_t GfxclkFreqGfxUlv; // in MHz uint8_t GfxIdlePadding2[2]; - - uint32_t GfxoffSpare[16]; + uint32_t GfxOffEntryHysteresis; //For RLC to count after it enters CGCG, and before triggers GFXOFF entry + uint32_t GfxoffSpare[15]; // GFX GPO - uint32_t GfxGpoSpare[16]; + float DfllBtcMasterScalerM; + int32_t DfllBtcMasterScalerB; + float DfllBtcSlaveScalerM; + int32_t DfllBtcSlaveScalerB; + uint32_t GfxGpoSpare[12]; // GFX DCS @@ -1326,8 +1331,11 @@ typedef struct { uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS + uint8_t FuseWritePowerMuxPresent; + uint8_t FuseWritePadding[3]; + // SECTION: Board Reserved - uint32_t BoardSpare[64]; + uint32_t BoardSpare[63]; // SECTION: Structure Padding diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 038a8956de5b..3e5838346f02 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -31,7 +31,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2A +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms -- cgit v1.2.3 From de0246ee9fc95f1072a0d26ec4025a3545f159b8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 14 Jul 2022 14:45:18 +0800 Subject: drm/amd/pm: enable GPO feature support for SMU13.0.0 The feature is ready with latest firmwares. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 2b83191e8006..2b973d76fad1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -311,6 +311,8 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VR0HOT_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); + return 0; } -- cgit v1.2.3 From 25dfc8fab47fb0d7231154e07e20b5fc0221f96e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 15 Jul 2022 16:48:09 +0800 Subject: drm/amd/pm: update SMU13.0.0 driver_if header To fit the latest 78.49.0 PMFW. Also, bump the version to 0x2B. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 5becfc1bb2ec..2b672d102c96 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -973,8 +973,8 @@ typedef struct { uint16_t Vmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at hot. uint16_t Vmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at cold. uint16_t Vmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Worst-case aging margin - uint16_t Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot - uint16_t Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold + uint16_t Spare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot + uint16_t Spare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold //This is a fixed/minimum VMIN aging degradation offset which is applied at T0. This reflects the minimum amount of aging already accounted for. uint16_t VcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 3e5838346f02..72b553618116 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2B #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms -- cgit v1.2.3 From 326f0672153e0ff5b19f0ff9d6b5f75a64a9c2b5 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 12 Jul 2022 14:23:49 +0800 Subject: drm/amd/pm: correct smu 13.0.0/7 mp1 state setup Only PP_MP1_STATE_UNLOAD is supported for now. For other mp1 state, we should just ignore it. Otherwise, there will be errors coming out. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 19 ++++++++++++++++++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 2b973d76fad1..931c775fe27e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1763,6 +1763,23 @@ static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = NULL; } +static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state) +{ + int ret; + + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ + ret = 0; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1829,7 +1846,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, - .set_mp1_state = smu_cmn_set_mp1_state, + .set_mp1_state = smu_v13_0_0_set_mp1_state, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 16eea2de8a2d..9dd56e73218b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1550,6 +1550,23 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp return ret; } +static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state) +{ + int ret; + + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ + ret = 0; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -1607,7 +1624,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_set_state = smu_v13_0_baco_set_state, .baco_enter = smu_v13_0_baco_enter, .baco_exit = smu_v13_0_baco_exit, - .set_mp1_state = smu_cmn_set_mp1_state, + .set_mp1_state = smu_v13_0_7_set_mp1_state, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From 42c7de9622b2ec64f7c15c336b75f8933ea7545c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 18 Jul 2022 10:45:35 +0800 Subject: drm/amd/pm: revise the driver reloading fix for SMU 13.0.0 and 13.0.7 The current approach breaks S3/S4 as asic reset is needed for them. And putting SMU out of service(via SMU_MSG_PrepareMp1ForUnload) will make that(asic reset) failed. Considering with current designs, there is actually also asic reset involved on driver reloading. That can make asic back to a clean state. So, the SMU_MSG_PrepareMp1ForUnload operation will be not so necessary. Thus we will just drop the SMU_MSG_PrepareMp1ForUnload operation. We may revise the whole driver reloading sequences when there is a better design. Fixes: 72aeb6ee0c78 ("drm/amd/pm: fix driver reload SMC firmware fail issue for smu13") Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fd79b213fab4..6d9b3c6af164 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1415,13 +1415,6 @@ static int smu_disable_dpms(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): - if (!(adev->in_runpm || amdgpu_in_reset(adev))) { - ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); - if (ret) { - dev_err(adev->dev, "Fail set mp1 state to UNLOAD!\n"); - return ret; - } - } return 0; default: break; -- cgit v1.2.3 From 2207efdd8388bd300a0051b1775705d890abd306 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Fri, 15 Jul 2022 13:12:14 +0800 Subject: drm/amd/amdgpu: add TAP_DELAYS upload support for gfx10 Support {GLOBAL/SE0/SE1/SE2/SE3}_TAP_DELAYS uploading. v2: upload TAP_DELAYS before RLC autoload was triggered. (Hawking) Signed-off-by: Chengming Gui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 15 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | 10 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 30 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 20 ++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 53 ++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 2 ++ 6 files changed, 129 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 6540582ecbf8..3ee363bfbac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2168,6 +2168,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + *type = GFX_FW_TYPE_SE0_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + *type = GFX_FW_TYPE_SE1_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + *type = GFX_FW_TYPE_SE2_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + *type = GFX_FW_TYPE_SE3_TAP_DELAYS; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index f6fd9e1a7dac..03ac36b2c2cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -222,6 +222,11 @@ struct amdgpu_rlc { u32 rlc_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; + u32 global_tap_delays_ucode_size_bytes; + u32 se0_tap_delays_ucode_size_bytes; + u32 se1_tap_delays_ucode_size_bytes; + u32 se2_tap_delays_ucode_size_bytes; + u32 se3_tap_delays_ucode_size_bytes; u32 *register_list_format; u32 *register_restore; @@ -232,6 +237,11 @@ struct amdgpu_rlc { u8 *rlc_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; + u8 *global_tap_delays_ucode; + u8 *se0_tap_delays_ucode; + u8 *se1_tap_delays_ucode; + u8 *se2_tap_delays_ucode; + u8 *se3_tap_delays_ucode; bool is_rlc_v2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index c312577df596..939c8614f0e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -561,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_P"; case AMDGPU_UCODE_ID_RLC_V: return "RLC_V"; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + return "GLOBAL_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + return "SE0_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + return "SE1_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + return "SE2_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + return "SE3_TAP_DELAYS"; case AMDGPU_UCODE_ID_IMU_I: return "IMU_I"; case AMDGPU_UCODE_ID_IMU_D: @@ -745,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcv_ucode; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.global_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode; + break; case AMDGPU_UCODE_ID_CP_MES: ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index f510b6aa82ab..ebed3f5226db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 { uint32_t rlcv_ucode_offset_bytes; }; +/* version_major=2, version_minor=4 */ +struct rlc_firmware_header_v2_4 { + struct rlc_firmware_header_v2_3 v2_3; + uint32_t global_tap_delays_ucode_size_bytes; + uint32_t global_tap_delays_ucode_offset_bytes; + uint32_t se0_tap_delays_ucode_size_bytes; + uint32_t se0_tap_delays_ucode_offset_bytes; + uint32_t se1_tap_delays_ucode_size_bytes; + uint32_t se1_tap_delays_ucode_offset_bytes; + uint32_t se2_tap_delays_ucode_size_bytes; + uint32_t se2_tap_delays_ucode_offset_bytes; + uint32_t se3_tap_delays_ucode_size_bytes; + uint32_t se3_tap_delays_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MES1_DATA, AMDGPU_UCODE_ID_IMU_I, AMDGPU_UCODE_ID_IMU_D, + AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS, + AMDGPU_UCODE_ID_SE0_TAP_DELAYS, + AMDGPU_UCODE_ID_SE1_TAP_DELAYS, + AMDGPU_UCODE_ID_SE2_TAP_DELAYS, + AMDGPU_UCODE_ID_SE3_TAP_DELAYS, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5820c3f0e215..fafbad3cf08d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3976,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); } +static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); +} + static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4153,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (version_major == 2) { if (version_minor >= 1) gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor == 2) + if (version_minor >= 2) gfx_v10_0_init_rlc_iram_dram_microcode(adev); + if (version_minor == 4) { + gfx_v10_0_init_tap_delays_microcode(adev); + } } } @@ -4251,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); } + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 236b7a61443a..22c775f39119 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -259,6 +259,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_CAP = 62, /* CAP_FW */ + GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */ + GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */ -- cgit v1.2.3 From 44998fbdcdc39253cb10293c3c27dabc040ad7e8 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 5 Apr 2022 10:42:47 -0400 Subject: drm/amdgpu: Refactor code to handle non coherent and uncached This simplifies existing coherence handling for Arcturus and Aldabaran to account for !coherent && uncached scenarios. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 51 +++++++++++------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d2d2b16c4d50..08997092e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -410,45 +410,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); -- cgit v1.2.3 From 34a1b0f90e33b00b431d6e8636b787111c6c29d5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 09:30:43 -0400 Subject: drm/amd/display: Move insert entry table to the FPU code The insert_entry_into_table_sorted function uses FPU operation and calls other static functions support. This commit moves the insert entry function with all the required struct and static functions to the FPU file. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 188 ++------------------- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 186 ++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 9 + 4 files changed, 208 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index c5ba411d12ba..d52192ab9421 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -115,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DC_LOGGER_INIT(logger) -#define DCN3_2_DEFAULT_DET_SIZE 256 -#define DCN3_2_MAX_DET_SIZE 1152 -#define DCN3_2_MIN_DET_SIZE 128 -#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 - -struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, - .pct_ideal_fabric_bw_after_urgent = 67.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_bw_after_urgent_strobe = 67.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_fabric_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_strobe_percent = 50.0, - .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .dram_clock_change_latency_us = 400, - .dispclk_dppclk_vco_speed_mhz = 4300.0, - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn32_clk_src_array_id { DCN32_CLK_SRC_PLL0, DCN32_CLK_SRC_PLL1, @@ -3454,53 +3323,6 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) } } -static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) -{ - float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - float limiting_bw_kbytes_sec = memory_bw_kbytes_sec; - - if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; - - if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; - - return limiting_bw_kbytes_sec; -} - -static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - if (*num_entries == 0) { - table[0] = *entry; - (*num_entries)++; - } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { - index++; - if (index >= *num_entries) - break; - } - - for (i = *num_entries; i > index; i--) { - table[i] = table[i - 1]; - } - - table[index] = *entry; - (*num_entries)++; - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -3585,7 +3407,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // Insert the max DCFCLK @@ -3594,7 +3418,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); // Insert the UCLK DPMS for (i = 0; i < num_uclk_dpms; i++) { @@ -3603,7 +3429,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // If FCLK is coarse grained, insert individual DPMs. @@ -3614,7 +3442,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } } // If FCLK fine grained, only insert max @@ -3624,7 +3454,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // At this point, the table contains all "points of interest" based on diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 901aa7e13bd2..7ccad84b1f16 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -33,6 +33,9 @@ #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_2_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; + struct dcn32_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 1b9e34f1232a..4223a9a9dd45 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -28,6 +28,132 @@ // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +struct _vcs_dpi_ip_params_st dcn3_2_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 16000.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 5.20, + .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) { /* defaults */ @@ -191,3 +317,63 @@ bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st p return false; } +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * + dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * + ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index d5f157cdd0b4..62cb0c1d462c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -29,6 +29,11 @@ #include "clk_mgr_internal.h" +#define DCN3_2_DEFAULT_DET_SIZE 256 +#define DCN3_2_MAX_DET_SIZE 1152 +#define DCN3_2_MIN_DET_SIZE 128 +#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 + void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, @@ -40,4 +45,8 @@ bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + #endif -- cgit v1.2.3 From 8f5bb69dd453666ff949079443ebce24381e163c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 10:28:23 -0400 Subject: drm/amd/display: Move phanton stream to FPU code This commit moves phanton FPU stream to dcn32_fpu file. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 89 +--------------------- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 84 ++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 8 ++ 3 files changed, 94 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index d52192ab9421..aee66c9ffcb0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1830,93 +1830,6 @@ static void dcn32_enable_phantom_plane(struct dc *dc, } } -/** - * *************************************************************************************** - * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream - * - * Set timing params of the phantom stream based on calculated output from DML. - * This function first gets the DML pipe index using the DC pipe index, then - * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of - * lines required for SubVP MCLK switching and assigns to the phantom stream - * accordingly. - * - * - The number of SubVP lines calculated in DML does not take into account - * FW processing delays and required pstate allow width, so we must include - * that separately. - * - * - Set phantom backporch = vstartup of main pipe - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] ref_pipe: Main pipe for the phantom stream - * @param [in] pipes: DML pipe params - * @param [in] pipe_cnt: number of DML pipes - * @param [in] dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) - * - * @return: void - * - * *************************************************************************************** - */ -static void dcn32_set_phantom_stream_timing(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int dc_pipe_idx) -{ - unsigned int i, pipe_idx; - struct pipe_ctx *pipe; - uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; - unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; - unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; - - // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (i == dc_pipe_idx) - break; - - pipe_idx++; - } - - // Calculate lines required for pstate allow width and FW processing delays - pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + - dc->caps.subvp_pstate_allow_width_us) / 1000000) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - - // Update clks_cfg for calling into recalculate - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = socclk; - - // DML calculation for MALL region doesn't take into account FW delay - // and required pstate allow width for multi-display cases - phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; - - // For backporch of phantom pipe, use vstartup of the main pipe - phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - phantom_stream->timing.v_front_porch = 1; - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; -} - static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1938,7 +1851,9 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + DC_FP_START(); dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); + DC_FP_END(); dc_add_stream_to_ctx(dc, context, phantom_stream); return phantom_stream; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 4223a9a9dd45..74ccf453349c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -377,3 +377,87 @@ void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, } } +/** + * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream + * + * Set timing params of the phantom stream based on calculated output from DML. + * This function first gets the DML pipe index using the DC pipe index, then + * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of + * lines required for SubVP MCLK switching and assigns to the phantom stream + * accordingly. + * + * - The number of SubVP lines calculated in DML does not take into account + * FW processing delays and required pstate allow width, so we must include + * that separately. + * + * - Set phantom backporch = vstartup of main pipe + * + * @dc: current dc state + * @context: new dc state + * @ref_pipe: Main pipe for the phantom stream + * @pipes: DML pipe params + * @pipe_cnt: number of DML pipes + * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) + */ +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx) +{ + unsigned int i, pipe_idx; + struct pipe_ctx *pipe; + uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; + unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; + + dc_assert_fp_enabled(); + + // Find DML pipe index (pipe_idx) using dc_pipe_idx + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (i == dc_pipe_idx) + break; + + pipe_idx++; + } + + // Calculate lines required for pstate allow width and FW processing delays + pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + + dc->caps.subvp_pstate_allow_width_us) / 1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + + // Update clks_cfg for calling into recalculate + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = socclk; + + // DML calculation for MALL region doesn't take into account FW delay + // and required pstate allow width for multi-display cases + phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + + pstate_width_fw_delay_lines; + + // For backporch of phantom pipe, use vstartup of the main pipe + phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + phantom_stream->timing.v_front_porch = 1; + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 62cb0c1d462c..4abef908dca9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -49,4 +49,12 @@ void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry); +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx); + #endif -- cgit v1.2.3 From 4cef2269318cdf5beb70c257f7aaba267f2157ae Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 10:51:01 -0400 Subject: drm/amd/display: Move SubVP functions to dcn32_fpu It looks like many of the code related to SubVP uses FPU operation, and we have many static functions that are part of this feature. This commit is a little bit large, but it only moves SubVP operation from one file to another, and I had to do it in a single change due to dependencies between functions. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 597 +-------------------- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 2 + .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 11 + .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 597 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 8 + 5 files changed, 620 insertions(+), 595 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index aee66c9ffcb0..fb48293fa04e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1624,171 +1624,6 @@ bool dcn32_release_post_bldn_3dlut( return ret; } -/** - ******************************************************************************************** - * dcn32_get_num_free_pipes: Calculate number of free pipes - * - * This function assumes that a "used" pipe is a pipe that has - * both a stream and a plane assigned to it. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: Number of free pipes available in the context - * - ******************************************************************************************** - */ -static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) -{ - unsigned int i; - unsigned int free_pipes = 0; - unsigned int num_pipes = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream && !pipe->top_pipe) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - } - } - - free_pipes = dc->res_pool->pipe_count - num_pipes; - return free_pipes; -} - -/** - ******************************************************************************************** - * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. - * - * We enter this function if we are Sub-VP capable (i.e. enough pipes available) - * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if - * we are forcing SubVP P-State switching on the current config. - * - * The number of pipes used for the chosen surface must be less than or equal to the - * number of free pipes available. - * - * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). - * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own - * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't - * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [out] index: dc pipe index for the pipe chosen to have phantom pipes assigned - * - * @return: True if a valid pipe assignment was found for Sub-VP. Otherwise false. - * - ******************************************************************************************** - */ - -static bool dcn32_assign_subvp_pipe(struct dc *dc, - struct dc_state *context, - unsigned int *index) -{ - unsigned int i, pipe_idx; - unsigned int max_frame_time = 0; - bool valid_assignment_found = false; - unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); - bool current_assignment_freesync = false; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - unsigned int num_pipes = 0; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - - pipe = &context->res_ctx.pipe_ctx[i]; - if (num_pipes <= free_pipes) { - struct dc_stream_state *stream = pipe->stream; - unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / - (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; - if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { - *index = i; - max_frame_time = frame_us; - valid_assignment_found = true; - current_assignment_freesync = false; - /* For the 2-Freesync display case, still choose the one with the - * longest frame time - */ - } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || - (current_assignment_freesync && frame_us > max_frame_time))) { - *index = i; - valid_assignment_found = true; - current_assignment_freesync = true; - } - } - } - pipe_idx++; - } - return valid_assignment_found; -} - -/** - * *************************************************************************************** - * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. - * - * This function returns true if there are enough free pipes - * to create the required phantom pipes for any given stream - * (that does not already have phantom pipe assigned). - * - * e.g. For a 2 stream config where the first stream uses one - * pipe and the second stream uses 2 pipes (i.e. pipe split), - * this function will return true because there is 1 remaining - * pipe which can be used as the phantom pipe for the non pipe - * split pipe. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: True if there are enough free pipes to assign phantom pipes to at least one - * stream that does not already have phantom pipes assigned. Otherwise false. - * - * *************************************************************************************** - */ -static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) -{ - unsigned int i, split_cnt, free_pipes; - unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 - bool subvp_possible = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - // Find the minimum pipe split count for non SubVP pipes - if (pipe->stream && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - split_cnt = 0; - while (pipe) { - split_cnt++; - pipe = pipe->bottom_pipe; - } - - if (split_cnt < min_pipe_split) - min_pipe_split = split_cnt; - } - } - - free_pipes = dcn32_get_num_free_pipes(dc, context); - - // SubVP only possible if at least one pipe is being used (i.e. free_pipes - // should not equal to the pipe_count) - if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) - subvp_possible = true; - - return subvp_possible; -} - static void dcn32_enable_phantom_plane(struct dc *dc, struct dc_state *context, struct dc_stream_state *phantom_stream, @@ -2033,436 +1868,6 @@ static struct pipe_ctx *dcn32_find_split_pipe( return pipe; } - -/** - * *************************************************************************************** - * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable - * - * High level algorithm: - * 1. Find longest microschedule length (in us) between the two SubVP pipes - * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both - * pipes still allows for the maximum microschedule to fit in the active - * region for both pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: bool - True if the SubVP + SubVP config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *subvp_pipes[2]; - struct dc_stream_state *phantom = NULL; - uint32_t microschedule_lines = 0; - uint32_t index = 0; - uint32_t i; - uint32_t max_microschedule_us = 0; - int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - uint32_t time_us = 0; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; - microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + - phantom->timing.v_addressable; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (microschedule_lines * phantom->timing.h_total) / - (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us + - dc->caps.subvp_fw_processing_delay_us + 1; - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - subvp_pipes[index] = pipe; - index++; - - // Maximum 2 SubVP pipes - if (index == 2) - break; - } - } - vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * - subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * - subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -/** - * *************************************************************************************** - * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe - * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching - * (the margin is equal to the MALL region + DRR margin (500us)) - * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) - * then report the configuration as supported - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] drr_pipe: DRR pipe_ctx for the SubVP + DRR config - * - * @return: bool - True if the SubVP + DRR config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) -{ - bool schedulable = false; - uint32_t i; - struct pipe_ctx *pipe = NULL; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *drr_timing = NULL; - int16_t prefetch_us = 0; - int16_t mall_region_us = 0; - int16_t drr_frame_us = 0; // nominal frame time - int16_t subvp_active_us = 0; - int16_t stretched_drr_us = 0; - int16_t drr_stretched_vblank_us = 0; - int16_t max_vblank_mallregion = 0; - - // Find SubVP pipe - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) - break; - } - - main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; - drr_timing = &drr_pipe->stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - - return schedulable; -} - -/** - * *************************************************************************************** - * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe - * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) - * then report the configuration as supported - * 3. If the VBLANK display is DRR, then take the DRR static schedulability path - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *pipe = NULL; - struct pipe_ctx *subvp_pipe = NULL; - bool found = false; - bool schedulable = false; - uint32_t i = 0; - uint8_t vblank_index = 0; - uint16_t prefetch_us = 0; - uint16_t mall_region_us = 0; - uint16_t vblank_frame_us = 0; - uint16_t subvp_active_us = 0; - uint16_t vblank_blank_us = 0; - uint16_t max_vblank_mallregion = 0; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *vblank_timing = NULL; - - /* For SubVP + VBLANK/DRR cases, we assume there can only be - * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK - * is supported, it is either a single VBLANK case or two VBLANK - * displays which are synchronized (in which case they have identical - * timings). - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { - // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). - vblank_index = i; - found = true; - } - - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) - subvp_pipe = pipe; - } - // Use ignore_msa_timing_param flag to identify as DRR - if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { - // SUBVP + DRR case - schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); - } else if (found) { - main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; - vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -/** - * ******************************************************************************************** - * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle - * static analysis based on the case. - * - * Three cases: - * 1. SubVP + SubVP - * 2. SubVP + VBLANK (DRR checked internally) - * 3. SubVP + VACTIVE (currently unsupported) - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] vlevel: Voltage level calculated by DML - * - * @return: bool - True if statically schedulable, false otherwise - * - * ******************************************************************************************** - */ -static bool subvp_validate_static_schedulability(struct dc *dc, - struct dc_state *context, - int vlevel) -{ - bool schedulable = true; // true by default for single display case - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - uint32_t i, pipe_idx; - uint8_t subvp_count = 0; - uint8_t vactive_count = 0; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) - subvp_count++; - - // Count how many planes are capable of VACTIVE switching (SubVP + VACTIVE unsupported) - if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0) { - vactive_count++; - } - pipe_idx++; - } - - if (subvp_count == 2) { - // Static schedulability check for SubVP + SubVP case - schedulable = subvp_subvp_schedulable(dc, context); - } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { - // Static schedulability check for SubVP + VBLANK case. Also handle the case where - // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) - if (vactive_count > 0) - schedulable = false; - else - schedulable = subvp_vblank_schedulable(dc, context); - } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp) { - // SubVP + VACTIVE currently unsupported - schedulable = false; - } - return schedulable; -} - -static void dcn32_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt) -{ - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - unsigned int dc_pipe_idx = 0; - bool found_supported_config = false; - struct pipe_ctx *pipe = NULL; - uint32_t non_subvp_pipes = 0; - bool drr_pipe_found = false; - uint32_t drr_pipe_index = 0; - uint32_t i = 0; - - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_uclk_fclk_and_stutter; - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - /* This may adjust vlevel and maxMpcComb */ - if (*vlevel < context->bw_ctx.dml.soc.num_states) - *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - /* Conditions for setting up phantom pipes for SubVP: - * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) - * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) - * 4. Display configuration passes validation - * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) - */ - if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && - (*vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || - dc->debug.force_subvp_mclk_switch)) { - - dcn32_merge_pipes_for_subvp(dc, context); - - while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && - dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { - - /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. - * Adding phantom pipes won't change the validation result, so change the DML input param - * for P-State support before adding phantom pipes and recalculating the DML result. - * However, this case is only applicable for SubVP + DRR cases because the prefetch mode - * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched - * enough to support support MCLK switching. - */ - if (*vlevel == context->bw_ctx.dml.soc.num_states) { - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; - /* There are params (such as FabricClock) that need to be recalculated - * after validation fails (otherwise it will be 0). Calculation for - * phantom vactive requires call into DML, so we must ensure all the - * vba params are valid otherwise we'll get incorrect phantom vactive. - */ - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - } - - dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); - - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - - if (*vlevel < context->bw_ctx.dml.soc.num_states && - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported - && subvp_validate_static_schedulability(dc, context, *vlevel)) { - found_supported_config = true; - } else if (*vlevel < context->bw_ctx.dml.soc.num_states && - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles - * the case for SubVP + DRR, where the DRR display does not support MCLK switch - * at it's native refresh rate / timing. - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - non_subvp_pipes++; - // Use ignore_msa_timing_param flag to identify as DRR - if (pipe->stream->ignore_msa_timing_param) { - drr_pipe_found = true; - drr_pipe_index = i; - } - } - } - // If there is only 1 remaining non SubVP pipe that is DRR, check static - // schedulability for SubVP + DRR. - if (non_subvp_pipes == 1 && drr_pipe_found) { - found_supported_config = subvp_drr_schedulable(dc, - context, &context->res_ctx.pipe_ctx[drr_pipe_index]); - } - } - } - - // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) - // remove phantom pipes and repopulate dml pipes - if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context); - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); - } else { - // only call dcn20_validate_apply_pipe_split_flags if we found a supported config - memset(split, 0, MAX_PIPES * sizeof(int)); - memset(merge, 0, MAX_PIPES * sizeof(bool)); - *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - // Most populate phantom DLG params before programming hardware / timing for phantom pipe - DC_FP_START(); - dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); - DC_FP_END(); - - // Note: We can't apply the phantom pipes to hardware at this time. We have to wait - // until driver has acquired the DMCUB lock to do it safely. - } - } -} - static bool dcn32_internal_validate_bw( struct dc *dc, struct dc_state *context, @@ -2498,7 +1903,9 @@ static bool dcn32_internal_validate_bw( dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); if (!fast_validate) { + DC_FP_START(); dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + DC_FP_END(); } if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 7ccad84b1f16..37d37067e983 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -103,6 +103,8 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); +bool dcn32_mpo_in_use(struct dc_state *context); + void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 796e3d966a76..47caa2c6d5b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -153,6 +153,17 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } +bool dcn32_mpo_in_use(struct dc_state *context) +{ + uint32_t i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count > 1) + return true; + } + return false; +} + void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 74ccf453349c..82d801933aec 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,6 +24,8 @@ * */ #include "dcn32_fpu.h" +#include "dcn32/dcn32_resource.h" +#include "dcn20/dcn20_resource.h" #include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" @@ -461,3 +463,598 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, phantom_bp; } +/** + * dcn32_get_num_free_pipes: Calculate number of free pipes + * + * This function assumes that a "used" pipe is a pipe that has + * both a stream and a plane assigned to it. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * Number of free pipes available in the context + */ +static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) +{ + unsigned int i; + unsigned int free_pipes = 0; + unsigned int num_pipes = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && !pipe->top_pipe) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + } + } + + free_pipes = dc->res_pool->pipe_count - num_pipes; + return free_pipes; +} + +/** + * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. + * + * We enter this function if we are Sub-VP capable (i.e. enough pipes available) + * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if + * we are forcing SubVP P-State switching on the current config. + * + * The number of pipes used for the chosen surface must be less than or equal to the + * number of free pipes available. + * + * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). + * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own + * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't + * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). + * + * @param dc: current dc state + * @param context: new dc state + * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned + * + * Return: + * True if a valid pipe assignment was found for Sub-VP. Otherwise false. + */ +static bool dcn32_assign_subvp_pipe(struct dc *dc, + struct dc_state *context, + unsigned int *index) +{ + unsigned int i, pipe_idx; + unsigned int max_frame_time = 0; + bool valid_assignment_found = false; + unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); + bool current_assignment_freesync = false; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + unsigned int num_pipes = 0; + unsigned int refresh_rate = 0; + + if (!pipe->stream) + continue; + + // Round up + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + + pipe = &context->res_ctx.pipe_ctx[i]; + if (num_pipes <= free_pipes) { + struct dc_stream_state *stream = pipe->stream; + unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / + (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; + if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { + *index = i; + max_frame_time = frame_us; + valid_assignment_found = true; + current_assignment_freesync = false; + /* For the 2-Freesync display case, still choose the one with the + * longest frame time + */ + } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || + (current_assignment_freesync && frame_us > max_frame_time))) { + *index = i; + valid_assignment_found = true; + current_assignment_freesync = true; + } + } + } + pipe_idx++; + } + return valid_assignment_found; +} + +/** + * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. + * + * This function returns true if there are enough free pipes + * to create the required phantom pipes for any given stream + * (that does not already have phantom pipe assigned). + * + * e.g. For a 2 stream config where the first stream uses one + * pipe and the second stream uses 2 pipes (i.e. pipe split), + * this function will return true because there is 1 remaining + * pipe which can be used as the phantom pipe for the non pipe + * split pipe. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * True if there are enough free pipes to assign phantom pipes to at least one + * stream that does not already have phantom pipes assigned. Otherwise false. + */ +static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) +{ + unsigned int i, split_cnt, free_pipes; + unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 + bool subvp_possible = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Find the minimum pipe split count for non SubVP pipes + if (pipe->stream && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + split_cnt = 0; + while (pipe) { + split_cnt++; + pipe = pipe->bottom_pipe; + } + + if (split_cnt < min_pipe_split) + min_pipe_split = split_cnt; + } + } + + free_pipes = dcn32_get_num_free_pipes(dc, context); + + // SubVP only possible if at least one pipe is being used (i.e. free_pipes + // should not equal to the pipe_count) + if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) + subvp_possible = true; + + return subvp_possible; +} + +/** + * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable + * + * High level algorithm: + * 1. Find longest microschedule length (in us) between the two SubVP pipes + * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both + * pipes still allows for the maximum microschedule to fit in the active + * region for both pipes. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + SubVP config is schedulable, false otherwise + */ +static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *subvp_pipes[2]; + struct dc_stream_state *phantom = NULL; + uint32_t microschedule_lines = 0; + uint32_t index = 0; + uint32_t i; + uint32_t max_microschedule_us = 0; + int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + uint32_t time_us = 0; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + phantom = pipe->stream->mall_stream_config.paired_stream; + microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + + phantom->timing.v_addressable; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (microschedule_lines * phantom->timing.h_total) / + (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us + + dc->caps.subvp_fw_processing_delay_us + 1; + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + subvp_pipes[index] = pipe; + index++; + + // Maximum 2 SubVP pipes + if (index == 2) + break; + } + } + vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * + subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * + subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +/** + * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe + * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching + * (the margin is equal to the MALL region + DRR margin (500us)) + * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) + * then report the configuration as supported + * + * @dc: current dc state + * @context: new dc state + * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config + * + * Return: + * bool - True if the SubVP + DRR config is schedulable, false otherwise + */ +static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) +{ + bool schedulable = false; + uint32_t i; + struct pipe_ctx *pipe = NULL; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *drr_timing = NULL; + int16_t prefetch_us = 0; + int16_t mall_region_us = 0; + int16_t drr_frame_us = 0; // nominal frame time + int16_t subvp_active_us = 0; + int16_t stretched_drr_us = 0; + int16_t drr_stretched_vblank_us = 0; + int16_t max_vblank_mallregion = 0; + + // Find SubVP pipe + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + // Find the SubVP pipe + if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + break; + } + + main_timing = &pipe->stream->timing; + phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + + return schedulable; +} + + +/** + * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe + * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) + * then report the configuration as supported + * 3. If the VBLANK display is DRR, then take the DRR static schedulability path + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise + */ +static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *pipe = NULL; + struct pipe_ctx *subvp_pipe = NULL; + bool found = false; + bool schedulable = false; + uint32_t i = 0; + uint8_t vblank_index = 0; + uint16_t prefetch_us = 0; + uint16_t mall_region_us = 0; + uint16_t vblank_frame_us = 0; + uint16_t subvp_active_us = 0; + uint16_t vblank_blank_us = 0; + uint16_t max_vblank_mallregion = 0; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *vblank_timing = NULL; + + /* For SubVP + VBLANK/DRR cases, we assume there can only be + * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK + * is supported, it is either a single VBLANK case or two VBLANK + * displays which are synchronized (in which case they have identical + * timings). + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). + vblank_index = i; + found = true; + } + + if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_pipe = pipe; + } + // Use ignore_msa_timing_param flag to identify as DRR + if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { + // SUBVP + DRR case + schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); + } else if (found) { + main_timing = &subvp_pipe->stream->timing; + phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +/** + * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle + * static analysis based on the case. + * + * Three cases: + * 1. SubVP + SubVP + * 2. SubVP + VBLANK (DRR checked internally) + * 3. SubVP + VACTIVE (currently unsupported) + * + * @dc: current dc state + * @context: new dc state + * @vlevel: Voltage level calculated by DML + * + * Return: + * bool - True if statically schedulable, false otherwise + */ +static bool subvp_validate_static_schedulability(struct dc *dc, + struct dc_state *context, + int vlevel) +{ + bool schedulable = true; // true by default for single display case + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + uint32_t i, pipe_idx; + uint8_t subvp_count = 0; + uint8_t vactive_count = 0; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_count++; + + // Count how many planes that aren't SubVP/phantom are capable of VACTIVE + // switching (SubVP + VACTIVE unsupported). In situations where we force + // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. + if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + vactive_count++; + } + pipe_idx++; + } + + if (subvp_count == 2) { + // Static schedulability check for SubVP + SubVP case + schedulable = subvp_subvp_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { + // Static schedulability check for SubVP + VBLANK case. Also handle the case where + // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) + if (vactive_count > 0) + schedulable = false; + else + schedulable = subvp_vblank_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp && + vactive_count > 0) { + // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. + // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. + // SubVP + VACTIVE currently unsupported + schedulable = false; + } + return schedulable; +} + +void dcn32_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + unsigned int dc_pipe_idx = 0; + bool found_supported_config = false; + struct pipe_ctx *pipe = NULL; + uint32_t non_subvp_pipes = 0; + bool drr_pipe_found = false; + uint32_t drr_pipe_index = 0; + uint32_t i = 0; + + dc_assert_fp_enabled(); + + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + /* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. !fast_validate) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ + if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && + !dcn32_mpo_in_use(context) && (*vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || + dc->debug.force_subvp_mclk_switch)) { + + dcn32_merge_pipes_for_subvp(dc, context); + + while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && + dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { + /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. + * Adding phantom pipes won't change the validation result, so change the DML input param + * for P-State support before adding phantom pipes and recalculating the DML result. + * However, this case is only applicable for SubVP + DRR cases because the prefetch mode + * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched + * enough to support MCLK switching. + */ + if (*vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + /* There are params (such as FabricClock) that need to be recalculated + * after validation fails (otherwise it will be 0). Calculation for + * phantom vactive requires call into DML, so we must ensure all the + * vba params are valid otherwise we'll get incorrect phantom vactive. + */ + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + } + + dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); + + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + // Populate dppclk to trigger a recalculate in dml_get_voltage_level + // so the phantom pipe DLG params can be assigned correctly. + pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0); + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + + if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported + && subvp_validate_static_schedulability(dc, context, *vlevel)) { + found_supported_config = true; + } else if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles + * the case for SubVP + DRR, where the DRR display does not support MCLK switch + * at it's native refresh rate / timing. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + non_subvp_pipes++; + // Use ignore_msa_timing_param flag to identify as DRR + if (pipe->stream->ignore_msa_timing_param) { + drr_pipe_found = true; + drr_pipe_index = i; + } + } + } + // If there is only 1 remaining non SubVP pipe that is DRR, check static + // schedulability for SubVP + DRR. + if (non_subvp_pipes == 1 && drr_pipe_found) { + found_supported_config = subvp_drr_schedulable(dc, context, + &context->res_ctx.pipe_ctx[drr_pipe_index]); + } + } + } + + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) + // remove phantom pipes and repopulate dml pipes + if (!found_supported_config) { + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + } else { + // only call dcn20_validate_apply_pipe_split_flags if we found a supported config + memset(split, 0, MAX_PIPES * sizeof(int)); + memset(merge, 0, MAX_PIPES * sizeof(bool)); + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + DC_FP_START(); + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + DC_FP_END(); + + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait + // until driver has acquired the DMCUB lock to do it safely. + } + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 4abef908dca9..29fb6b1bc17f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -57,4 +57,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, unsigned int pipe_cnt, unsigned int dc_pipe_idx); +void dcn32_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt); + #endif -- cgit v1.2.3 From 0339530d8879cbd560cd3d3de5138dc797744274 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 11:24:05 -0400 Subject: drm/amd/display: Move wm and dlg calculation to FPU code Move dcn32_calculate_wm_and_dlg from dcn32 resources to the FPU code. Additionally, this commit adds an interface to it. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 196 +-------------------- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 185 +++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 5 + 3 files changed, 195 insertions(+), 191 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index fb48293fa04e..3385b5c70c8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2280,187 +2280,6 @@ int dcn32_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn32_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx, vlevel_temp = 0; - double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != - dm_dram_clock_change_unsupported; - - // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK - if (!pstate_en && dcn32_subvp_in_use(dc, context)) { - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; - pstate_en = true; - } - - /* Set B: - * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, - * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark - * calculations to cover bootup clocks. - * DCFCLK: soc.clock_limits[2] when available - * UCLK: soc.clock_limits[2] when available - */ - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 2; - dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set D: - * All clocks min. - * DCFCLK: Min, as reported by PM FW when available - * UCLK : Min, as reported by PM FW when available - * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) - */ - - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 0; - dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set C, for Dummy P-State: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK : Min, as reported by PM FW, when available - * pstate latency as per UCLK state dummy pstate latency - */ - // For Set A and Set C use values from validation - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts_margin = 160; - - if ((!pstate_en)) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { - /* The only difference between A and C is p-state latency, if p-state is not supported - * with full p-state latency we want to calculate DLG based on dummy p-state latency, - * Set A p-state watermark set to 0 on DCN32, when p-state unsupported, for now keep as DCN32. - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK: Min, as reported by PM FW, when available - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - - dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; -} - static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; @@ -2488,18 +2307,13 @@ static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); } -void dcn32_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) +void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) { DC_FP_START(); - dcn32_calculate_wm_and_dlg_fp( - dc, context, - pipes, - pipe_cnt, - vlevel); + dcn32_calculate_wm_and_dlg_fpu(dc, context, pipes, pipe_cnt, vlevel); DC_FP_END(); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 82d801933aec..353d3a74e40b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1058,3 +1058,188 @@ void dcn32_full_validate_bw_helper(struct dc *dc, } } +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, vlevel_temp = 0; + double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; + + dc_assert_fp_enabled(); + + // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK + if (!pstate_en && dcn32_subvp_in_use(dc, context)) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + pstate_en = true; + } + + /* Set B: + * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, + * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark + * calculations to cover bootup clocks. + * DCFCLK: soc.clock_limits[2] when available + * UCLK: soc.clock_limits[2] when available + */ + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 2; + dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set D: + * All clocks min. + * DCFCLK: Min, as reported by PM FW when available + * UCLK : Min, as reported by PM FW when available + * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) + */ + + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 0; + dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set C, for Dummy P-State: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK : Min, as reported by PM FW, when available + * pstate latency as per UCLK state dummy pstate latency + */ + + // For Set A and Set C use values from validation + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin = 160; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == + dm_dram_clock_change_unsupported) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ + for (i = 3; i > 0; i--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) + break; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { + /* The only difference between A and C is p-state latency, if p-state is not supported + * with full p-state latency we want to calculate DLG based on dummy p-state latency, + * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30. + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK: Min, as reported by PM FW, when available + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 29fb6b1bc17f..c7602f084be2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -65,4 +65,9 @@ void dcn32_full_validate_bw_helper(struct dc *dc, bool *merge, int *pipe_cnt); +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + #endif -- cgit v1.2.3 From 5b4ee98713c7b42e270835fea08c3b6977b48e7e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 11:50:22 -0400 Subject: drm/amd/display: Move dlg params calculation Move dlg params calculation to the FPU folder and make it static. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 485 +------------------- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 6 - .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 506 ++++++++++++++++++++- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 13 +- 4 files changed, 513 insertions(+), 497 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 3385b5c70c8c..0ca1d4c51baa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1752,368 +1752,6 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, } } -static bool dcn32_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - if (pri_pipe->plane_state) { - /* ODM + window MPO, where MPO window is on left half only */ - if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= - pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) - return true; - - /* ODM + window MPO, where MPO window is on right half only */ - if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.width/2) - return true; - } - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dcn32_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - } - - /* - * May need to fix pipes getting tossed from 1 opp to another on flip - * Add for debugging transient underflow during topology updates: - * ASSERT(pipe); - */ - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - - return pipe; -} - -static bool dcn32_internal_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - bool repopulate_pipes = false; - int split[MAX_PIPES] = { 0 }; - bool merge[MAX_PIPES] = { false }; - bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - ASSERT(pipes); - if (!pipes) - return false; - - // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context); - - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (!fast_validate) { - DC_FP_START(); - dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); - DC_FP_END(); - } - - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, MAX_PIPES * sizeof(int)); - memset(merge, 0, MAX_PIPES * sizeof(bool)); - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - } - - dml_log_mode_support_params(&context->bw_ctx.dml); - - if (vlevel == context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_res.scl_data.recout, - &pipe->plane_res.scl_data.recout, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - - /* merge pipes if necessary */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /*skip pipes that don't need merging*/ - if (!merge[i]) - continue; - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else - ASSERT(0); /* Should never try to merge master pipe */ - - } - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = NULL; - bool odm; - int old_index = -1; - - if (!pipe->stream || newly_split[i]) - continue; - - pipe_idx++; - odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; - - if (!pipe->plane_state && !odm) - continue; - - if (split[i]) { - if (odm) { - if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - } else { - if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else if (old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - } - hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) - goto validate_fail; - - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe, odm)) - goto validate_fail; - - newly_split[hsplit_pipe->pipe_idx] = true; - repopulate_pipes = true; - } - if (split[i] == 4) { - struct pipe_ctx *pipe_4to1; - - if (odm && old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - - if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - hsplit_pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - } - if (odm) - dcn20_build_mapped_resource(dc, context, pipe->stream); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!resource_build_scaling_params(pipe)) - goto validate_fail; - } - } - - /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { - vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - if (repopulate_pipes) - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - *vlevel_out = vlevel; - *pipe_cnt_out = pipe_cnt; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - bool dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) @@ -2129,9 +1767,9 @@ bool dcn32_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); - DC_FP_START(); + DC_FP_START(); out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); - DC_FP_END(); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; @@ -2317,125 +1955,6 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) - return true; - } - return false; -} - -void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - int pipe_cnt, int vlevel) -{ - int i, pipe_idx; - bool usr_retraining_support = false; - bool unbounded_req_enabled = false; - - /* Writeback MCIF_WB arbitration parameters */ - dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); - - context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; - context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; - context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; - context->bw_ctx.bw.dcn.clk.p_state_change_support = - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] - != dm_dram_clock_change_unsupported; - context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); - /* - * - * TODO: needs FAMS - * Pstate change might not be supported by hardware, but it might be - * possible with firmware driven vertical blank stretching. - */ - // context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching; - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; - if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; - else - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; - - usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - ASSERT(usr_retraining_support); - - if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) - context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - - unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (unbounded_req_enabled && pipe_cnt > 1) { - // Unbounded requesting should not ever be used when more than 1 pipe is enabled. - ASSERT(false); - unbounded_req_enabled = false; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { - // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; - context->res_ctx.pipe_ctx[i].unbounded_req = false; - } else { - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; - } - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; - pipe_idx++; - } - /*save a original dppclock copy*/ - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz - * 1000; - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz - * 1000; - - context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, - pipe_cnt, pipe_idx); - - context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, - &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - pipe_idx++; - } -} - static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) { if (entry->dcfclk_mhz > 0) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 37d37067e983..fc0fe48023a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -44,12 +44,6 @@ struct resource_pool *dcn32_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); -void dcn32_calculate_dlg_params( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel); - struct panel_cntl *dcn32_panel_cntl_create( const struct panel_cntl_init_data *init_data); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 353d3a74e40b..66102db87265 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,12 +24,15 @@ * */ #include "dcn32_fpu.h" +#include "dc_link_dp.h" #include "dcn32/dcn32_resource.h" #include "dcn20/dcn20_resource.h" #include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +#define DC_LOGGER_INIT(logger) + struct _vcs_dpi_ip_params_st dcn3_2_ip = { .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, @@ -931,7 +934,7 @@ static bool subvp_validate_static_schedulability(struct dc *dc, return schedulable; } -void dcn32_full_validate_bw_helper(struct dc *dc, +static void dcn32_full_validate_bw_helper(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *vlevel, @@ -1058,6 +1061,507 @@ void dcn32_full_validate_bw_helper(struct dc *dc, } } +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int vlevel) +{ + int i, pipe_idx; + bool usr_retraining_support = false; + bool unbounded_req_enabled = false; + + dc_assert_fp_enabled(); + + /* Writeback MCIF_WB arbitration parameters */ + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; + if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; + else + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; + + usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + ASSERT(usr_retraining_support); + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; + } + + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz + * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz + * 1000; + + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, + pipe_cnt, pipe_idx); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, + &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe_idx++; + } +} + +static struct pipe_ctx *dcn32_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +static bool dcn32_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (odm && pri_pipe->plane_state) { + /* ODM + window MPO, where MPO window is on left half only */ + if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= + pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + + /* ODM + window MPO, where MPO window is on right half only */ + if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + } + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + ASSERT(sec_pipe->top_pipe == NULL); + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx, vlevel; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + dc_assert_fp_enabled(); + + ASSERT(pipes); + if (!pipes) + return false; + + // For each full update, remove all existing phantom pipes first + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate) { + DC_FP_START(); + dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + DC_FP_END(); + } + + if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if + * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_fclk_and_stutter; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + /* Last attempt with Prefetch mode 2 (dm_prefetch_support_stutter == 3) */ + if (vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + } + + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && !dc->config.enable_windowed_mpo_odm + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_res.scl_data.recout, + &pipe->plane_res.scl_data.recout, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + + void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index c7602f084be2..56973debc348 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -57,13 +57,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, unsigned int pipe_cnt, unsigned int dc_pipe_idx); -void dcn32_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt); +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate); void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, -- cgit v1.2.3 From a4f8f294fe99a678ce0a50d649732440c41742b7 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 11:55:39 -0400 Subject: drm/amd/display: Move ntuple to insert entry Move get_optimal_ntuple to the FPU code and call it inside insert_entry_into_table_sorted. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 28 ---------------------- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 25 +++++++++++++++++++ 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 0ca1d4c51baa..db4008016911 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1955,29 +1955,6 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -2061,7 +2038,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2072,7 +2048,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2083,7 +2058,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2096,7 +2070,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2108,7 +2081,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = max_fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 66102db87265..7c60a954737b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -353,6 +353,29 @@ static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st * return limiting_bw_kbytes_sec; } +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry) @@ -363,6 +386,8 @@ void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, dc_assert_fp_enabled(); + get_optimal_ntuple(entry); + if (*num_entries == 0) { table[0] = *entry; (*num_entries)++; -- cgit v1.2.3 From 4e14e0fc9f0e47d9d3d39b6f92724f2038ee6e25 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 12:06:27 -0400 Subject: drm/amd/display: Move bounding box to FPU folder The final part of the DCN32 code that uses FPU is the bounding box code, and this commit move it to dcn32_fpu. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 460 +------------------- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 470 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 2 + 3 files changed, 474 insertions(+), 458 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index db4008016911..e551d2936d03 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1922,29 +1922,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, @@ -1955,444 +1932,11 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) - num_dcfclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) - return -1; - - if (max_dppclk_mhz == 0) - max_dppclk_mhz = max_dispclk_mhz; - - if (max_fclk_mhz == 0) - max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; - - if (max_phyclk_mhz == 0) - max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; - - *num_entries = 0; - entry.dispclk_mhz = max_dispclk_mhz; - entry.dscclk_mhz = max_dispclk_mhz / 3; - entry.dppclk_mhz = max_dppclk_mhz; - entry.dtbclk_mhz = max_dtbclk_mhz; - entry.phyclk_mhz = max_phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; - - // Insert all the DCFCLK STAs - for (i = 0; i < num_dcfclk_stas; i++) { - entry.dcfclk_mhz = dcfclk_sta_targets[i]; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // Insert the max DCFCLK - entry.dcfclk_mhz = max_dcfclk_mhz; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); + dcn32_update_bw_bounding_box_fpu(dc, bw_params); DC_FP_END(); - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // If FCLK is coarse grained, insert individual DPMs. - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - } - // If FCLK fine grained, only insert max - else { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = max_fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Remove states that require higher clocks than are supported - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz > max_dcfclk_mhz || - table[i].fabricclk_mhz > max_fclk_mhz || - table[i].dram_speed_mts > max_uclk_mhz * 16) - remove_entry_from_table_at_index(table, num_entries, i); - } - - // At this point, the table only contains supported points of interest - // it could be used as is, but some states may be redundant due to - // coarse grained nature of some clocks, so we want to round up to - // coarse grained DPMs and remove duplicates. - - // Round up UCLKs - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { - table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { - table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; - break; - } - } - } - } - // Otherwise, round up to minimum. - else { - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].fabricclk_mhz < min_fclk_mhz) { - table[i].fabricclk_mhz = min_fclk_mhz; - break; - } - } - } - - // Round DCFCLKs up to minimum - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz < min_dcfclk_mhz) { - table[i].dcfclk_mhz = min_dcfclk_mhz; - break; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < *num_entries - 1) { - if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && - table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && - table[i].dram_speed_mts == table[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(table, num_entries, i + 1); - else - i++; - } - - // Fix up the state indicies - for (i = *num_entries - 1; i >= 0 ; i--) { - table[i].state = i; - } - - return 0; -} - -/* dcn32_update_bw_bounding_box - * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet - * with actual values as per dGPU SKU: - * -with passed few options from dc->config - * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) - * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes - * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU - * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) - * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different - * clocks (which might differ for certain dGPU SKU of the same ASIC) - */ -static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - - /* Overrides from dc->config options */ - dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - /* Override from passed dc->bb_overrides if available*/ - if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns - && dc->bb_overrides.sr_exit_time_ns) { - dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) - != dc->bb_overrides.sr_enter_plus_exit_time_ns - && dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dcn3_2_soc.sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns - && dc->bb_overrides.urgent_latency_ns) { - dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) - != dc->bb_overrides.dram_clock_change_latency_ns - && dc->bb_overrides.dram_clock_change_latency_ns) { - dcn3_2_soc.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) - != dc->bb_overrides.dummy_clock_change_latency_ns - && dc->bb_overrides.dummy_clock_change_latency_ns) { - dcn3_2_soc.dummy_pstate_latency_us = - dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; - } - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - } - - /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ - dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ - if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { - if (dc->debug.use_legacy_soc_bb_mechanism) { - unsigned int i = 0, j = 0, num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - unsigned int min_dcfclk = UINT_MAX; - /* Set 199 as first value in STA target array to have a minimum DCFCLK value. - * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && - bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) - min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - if (min_dcfclk > dcfclk_sta_targets[0]) - dcfclk_sta_targets[0] = min_dcfclk; - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; - - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_2_soc.num_states = num_states; - for (i = 0; i < dcn3_2_soc.num_states; i++) { - dcn3_2_soc.clock_limits[i].state = i; - dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - - /* Fill all states with max values of all these clocks */ - dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; - - /* Populate from bw_params for DTBCLK, SOCCLK */ - if (i > 0) { - if (!bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; - } else { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - - if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) - dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; - else - dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; - - if (!dram_speed_mts[i] && i > 0) - dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; - else - dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ - /* PHYCLK_D18, PHYCLK_D32 */ - dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; - } - } else { - build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); - } - - /* Re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - } } static struct resource_funcs dcn32_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 7c60a954737b..9175fe1f9be3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1772,3 +1772,473 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } +static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +/** + * dcn32_update_bw_bounding_box + * + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from + * spreadsheet with actual values as per dGPU SKU: + * - with passed few options from dc->config + * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might + * need to get it from PM FW) + * - with passed latency values (passed in ns units) in dc-> bb override for + * debugging purposes + * - with passed latencies from VBIOS (in 100_ns units) if available for + * certain dGPU SKU + * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU + * of the same ASIC) + * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM + * FW for different clocks (which might differ for certain dGPU SKU of the + * same ASIC) + */ +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_2_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_2_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_2_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + } + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + unsigned int min_dcfclk = UINT_MAX; + /* Set 199 as first value in STA target array to have a minimum DCFCLK value. + * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && + bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) + min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (min_dcfclk > dcfclk_sta_targets[0]) + dcfclk_sta_targets[0] = min_dcfclk; + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_2_soc.num_states = num_states; + for (i = 0; i < dcn3_2_soc.num_states; i++) { + dcn3_2_soc.clock_limits[i].state = i; + dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 56973debc348..3ed06ab855be 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -69,4 +69,6 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int pipe_cnt, int vlevel); +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + #endif -- cgit v1.2.3 From f7907f6db81ca08f4f8bd9245b8fecd49bc82f6e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 12:08:34 -0400 Subject: drm/amd/display: Drop FPU flags from dcn32 Makefile This is the final commit from the FPU isolation for DCN32 and for this reason we can finally remove flags related to FPU. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 28 --------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 932d85fa4262..e943b643ab6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -15,34 +15,6 @@ DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o dcn32_init.o \ dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ dcn32_resource_helpers.o dcn32_mpc.o -ifdef CONFIG_X86 -dcn32_ccflags := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -dcn32_ccflags := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -dcn32_ccflags += -mpreferred-stack-boundary=4 -else -dcn32_ccflags += -msse2 -endif -endif - -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource_helpers.o := $(dcn32_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := $(dcn32_ccflags) - AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN32) -- cgit v1.2.3 From 197485c695430ab03d9f4a96aab4d4f76787f7c9 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 15:53:56 -0400 Subject: drm/amd/display: Create dcn321_fpu file The file dcn321_resource has a lot of FPU operations that should be inside the dml folder. This commit introduces the dcn321_fpu file and moves some of the FPU operation functions to this new file. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn321/dcn321_resource.c | 225 ++----------------- .../drm/amd/display/dc/dcn321/dcn321_resource.h | 3 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 238 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h | 36 ++++ 5 files changed, 296 insertions(+), 208 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index d218c6dd71aa..6619bcb30de7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -37,6 +37,8 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" +#include "dml/dcn321/dcn321_fpu.h" + #include "dcn10/dcn10_ipp.h" #include "dcn30/dcn30_hubbub.h" #include "dcn31/dcn31_hubbub.h" @@ -120,134 +122,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define fixed16_to_double(x) (((double)x) / ((double) (1 << 16))) #define fixed16_to_double_to_cpu(x) fixed16_to_double(le32_to_cpu(x)) -#define DCN3_2_DEFAULT_DET_SIZE 256 - -struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, - .pct_ideal_fabric_bw_after_urgent = 67.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_bw_after_urgent_strobe = 67.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_fabric_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_strobe_percent = 50.0, - .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .dram_clock_change_latency_us = 400, - .dispclk_dppclk_vco_speed_mhz = 4300.0, - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn321_clk_src_array_id { DCN321_CLK_SRC_PLL0, DCN321_CLK_SRC_PLL1, @@ -1719,76 +1593,6 @@ static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); } -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - -static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) -{ - float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); - - float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); - - float limiting_bw_kbytes_sec = memory_bw_kbytes_sec; - - if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; - - if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; - - return limiting_bw_kbytes_sec; -} - -static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - if (*num_entries == 0) { - table[0] = *entry; - (*num_entries)++; - } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { - index++; - if (index >= *num_entries) - break; - } - - for (i = *num_entries; i > index; i--) { - table[i] = table[i - 1]; - } - - table[index] = *entry; - (*num_entries)++; - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -1872,8 +1676,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // Insert the max DCFCLK @@ -1881,8 +1686,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); // Insert the UCLK DPMS for (i = 0; i < num_uclk_dpms; i++) { @@ -1890,8 +1696,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // If FCLK is coarse grained, insert individual DPMs. @@ -1901,8 +1708,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } } // If FCLK fine grained, only insert max @@ -1911,8 +1719,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = max_fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // At this point, the table contains all "points of interest" based on diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h index 2732085a0e88..82cbf009f2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h @@ -31,6 +31,9 @@ #define TO_DCN321_RES_POOL(pool)\ container_of(pool, struct dcn321_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_21_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc; + struct dcn321_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 01cb0ef3a2b0..359f6e9a1da0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -76,6 +76,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) @@ -126,6 +127,7 @@ DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o DML += dcn32/dcn32_fpu.o +DML += dcn321/dcn321_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c new file mode 100644 index 000000000000..78408698985b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" +#include "dcn321_fpu.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321/dcn321_resource.h" + +#define DCN3_2_DEFAULT_DET_SIZE 256 + +struct _vcs_dpi_ip_params_st dcn3_21_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 1600.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 5.20, + .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + get_optimal_ntuple(entry); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h new file mode 100644 index 000000000000..5b6b28526e18 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "dml/display_mode_vba.h" + +void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + +#endif -- cgit v1.2.3 From 352b25a770712a954b1def1b8c6429b0825ee0b3 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 16:07:03 -0400 Subject: drm/amd/display: Drop FPU code from dcn321 resource This commit fully move the missing FPU operations from dcn321 resource to dcn321 fpu. It also remove those FPU flags from the Makefile. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn321/Makefile | 25 -- .../drm/amd/display/dc/dcn321/dcn321_resource.c | 452 +-------------------- .../gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 446 ++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h | 2 + 4 files changed, 450 insertions(+), 475 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile index e554fd6c16f2..0a199c83bb5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile @@ -12,31 +12,6 @@ DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -msse2 -endif -endif - AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN321) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 6619bcb30de7..9ac0fcf79bed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1570,459 +1570,11 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) - num_dcfclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) - return -1; - - if (max_dppclk_mhz == 0) - max_dppclk_mhz = max_dispclk_mhz; - - if (max_fclk_mhz == 0) - max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; - - if (max_phyclk_mhz == 0) - max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; - - *num_entries = 0; - entry.dispclk_mhz = max_dispclk_mhz; - entry.dscclk_mhz = max_dispclk_mhz / 3; - entry.dppclk_mhz = max_dppclk_mhz; - entry.dtbclk_mhz = max_dtbclk_mhz; - entry.phyclk_mhz = max_phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; - - // Insert all the DCFCLK STAs - for (i = 0; i < num_dcfclk_stas; i++) { - entry.dcfclk_mhz = dcfclk_sta_targets[i]; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // Insert the max DCFCLK - entry.dcfclk_mhz = max_dcfclk_mhz; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + dcn321_update_bw_bounding_box_fpu(dc, bw_params); DC_FP_END(); - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // If FCLK is coarse grained, insert individual DPMs. - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - } - // If FCLK fine grained, only insert max - else { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = max_fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Remove states that require higher clocks than are supported - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz > max_dcfclk_mhz || - table[i].fabricclk_mhz > max_fclk_mhz || - table[i].dram_speed_mts > max_uclk_mhz * 16) - remove_entry_from_table_at_index(table, num_entries, i); - } - - // At this point, the table only contains supported points of interest - // it could be used as is, but some states may be redundant due to - // coarse grained nature of some clocks, so we want to round up to - // coarse grained DPMs and remove duplicates. - - // Round up UCLKs - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { - table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { - table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; - break; - } - } - } - } - // Otherwise, round up to minimum. - else { - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].fabricclk_mhz < min_fclk_mhz) { - table[i].fabricclk_mhz = min_fclk_mhz; - break; - } - } - } - - // Round DCFCLKs up to minimum - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz < min_dcfclk_mhz) { - table[i].dcfclk_mhz = min_dcfclk_mhz; - break; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < *num_entries - 1) { - if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && - table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && - table[i].dram_speed_mts == table[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(table, num_entries, i + 1); - else - i++; - } - - // Fix up the state indicies - for (i = *num_entries - 1; i >= 0 ; i--) { - table[i].state = i; - } - - return 0; -} - -/* dcn321_update_bw_bounding_box - * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet - * with actual values as per dGPU SKU: - * -with passed few options from dc->config - * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) - * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes - * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU - * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) - * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different - * clocks (which might differ for certain dGPU SKU of the same ASIC) - */ -static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - /* Overrides from dc->config options */ - dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - /* Override from passed dc->bb_overrides if available*/ - if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns - && dc->bb_overrides.sr_exit_time_ns) { - dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) - != dc->bb_overrides.sr_enter_plus_exit_time_ns - && dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dcn3_21_soc.sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns - && dc->bb_overrides.urgent_latency_ns) { - dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) - != dc->bb_overrides.dram_clock_change_latency_ns - && dc->bb_overrides.dram_clock_change_latency_ns) { - dcn3_21_soc.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) - != dc->bb_overrides.dummy_clock_change_latency_ns - && dc->bb_overrides.dummy_clock_change_latency_ns) { - dcn3_21_soc.dummy_pstate_latency_us = - dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; - } - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - } - - /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ - dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ - if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { - if (dc->debug.use_legacy_soc_bb_mechanism) { - unsigned int i = 0, j = 0, num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; - unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; - - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_21_soc.num_states = num_states; - for (i = 0; i < dcn3_21_soc.num_states; i++) { - dcn3_21_soc.clock_limits[i].state = i; - dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - - /* Fill all states with max values of all these clocks */ - dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; - - /* Populate from bw_params for DTBCLK, SOCCLK */ - if (i > 0) { - if (!bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; - } else { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - - if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) - dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; - else - dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; - - if (!dram_speed_mts[i] && i > 0) - dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; - else - dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ - /* PHYCLK_D18, PHYCLK_D32 */ - dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; - } - } else { - build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); - } - - /* Re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - } } static struct resource_funcs dcn321_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 78408698985b..6e72336b7975 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -24,6 +24,7 @@ * */ +#include "clk_mgr.h" #include "resource.h" #include "dcn321_fpu.h" #include "dcn32/dcn32_resource.h" @@ -236,3 +237,448 @@ void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *t } } +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +/** dcn321_update_bw_bounding_box + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet + * with actual values as per dGPU SKU: + * -with passed few options from dc->config + * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) + * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes + * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU + * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) + * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different + * clocks (which might differ for certain dGPU SKU of the same ASIC) + */ +void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_21_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_21_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_21_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + } + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_21_soc.num_states = num_states; + for (i = 0; i < dcn3_21_soc.num_states; i++) { + dcn3_21_soc.clock_limits[i].state = i; + dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h index 5b6b28526e18..e8fad9b4be69 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -33,4 +33,6 @@ void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *t unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry); +void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + #endif -- cgit v1.2.3 From 9a1dadb6d33173427b6f6c89ab1ddd0eca636afb Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 11 Jul 2022 00:43:58 -0400 Subject: drm/amd/display: 3.2.195 This version brings along following fixes: - Isolate FPU operation for DCN32/321 under the DML folder - Create a specific file for CRTC and plane based on amdgpu_dm - Fix DSC issues - Update DML logic Acked-by: Alan Liu Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index be41f9fcf1dd..d05bbe193bfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.194" +#define DC_VER "3.2.195" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 39a6f3fe98ff03baab364acfec69a6f9803ce22c Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:04 -0100 Subject: drm/amd/display: fix soft-fp vs hard-fp on DCN 3.1 family for powerpc Move remaining FPU code to DML folder that caused compilation error for powerpc. This patch depends on [1] to prevent the error below: /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o uses soft float /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge target specific data of file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o uses soft float /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge target specific data of file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o uses soft float /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge target specific data of file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o [1] https://lore.kernel.org/amd-gfx/20220716195144.342960-1-mwen@igalia.com/ Reported-by: Guenter Roeck Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 11 +++++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 3 +++ 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 178d40c0d70a..929b712cbada 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1663,11 +1663,12 @@ int dcn31_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index df2abd8fe2eb..1a5f5977f962 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1658,11 +1658,12 @@ static int dcn315_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 070fe10a004e..53dea466348f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -1661,11 +1661,12 @@ static int dcn316_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index facac3daeaca..e36cfa5985ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -435,8 +435,19 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} + void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { + dc_assert_fp_enabled(); + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index 0a10de80c1a4..4372f17b55d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -31,6 +31,9 @@ #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_DEFAULT_DET_SIZE 192 +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); void dcn31_calculate_wm_and_dlg_fp( -- cgit v1.2.3 From ca0273ab447853c014983951e2183991f8dbc7b9 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:05 -0100 Subject: drm/amd/display: remove useless FPU protection wrapper from dcn31_resource file Many lines of code in dcn31_resource_construct are wrapped by DC_FP macro to protect FPU operations; however, there is no FPU in this region. Therefore, just remove the wrapper for clarity. Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 929b712cbada..6d25fcf865bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1863,8 +1863,6 @@ static bool dcn31_resource_construct( struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn31; @@ -2175,13 +2173,9 @@ static bool dcn31_resource_construct( dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; - DC_FP_END(); - return true; create_fail: - - DC_FP_END(); dcn31_resource_destruct(pool); return false; -- cgit v1.2.3 From 1a3408259bfd5a383b3c7aa7c309c9fb81e35ce0 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:06 -0100 Subject: drm/amd/display: move FPU code on dcn21 clk_mgr The -mno-gnu-attribute option in dcn21 clk mgr makefile hides a soft vs hard fp error for powerpc. After removing this flag, we can see some FPU code remains there: /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn21/rn_clk_mgr.o uses soft float Therefore, remove the -mno-gnu-attribute flag for dcn21/powerpc and move FPU-associated code to DML folder. Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 6 - .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 234 +------------------- .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h | 7 + .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 235 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h | 2 + 5 files changed, 248 insertions(+), 236 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index a48453612d10..66dc02c426e9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -107,12 +107,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN201) ############################################################################### CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to Renoir APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index cf1b5f354ae9..0202dc682682 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -26,10 +26,9 @@ #include "dccg.h" #include "clk_mgr_internal.h" - #include "dcn20/dcn20_clk_mgr.h" #include "rn_clk_mgr.h" - +#include "dml/dcn20/dcn20_fpu.h" #include "dce100/dce_clk_mgr.h" #include "rn_clk_mgr_vbios_smu.h" @@ -45,7 +44,6 @@ /* Constants */ -#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ #define SMU_VER_55_51_0 0x373300 /* SMU Version that is able to set DISPCLK below 100MHz */ /* Macros */ @@ -613,228 +611,6 @@ static struct clk_bw_params rn_bw_params = { }; -static struct wm_table ddr4_wm_table_gs = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 7.09, - .sr_enter_plus_exit_time_us = 8.14, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_gs = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 5.32, - .sr_enter_plus_exit_time_us = 6.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_with_disabled_ppt = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 8.32, - .sr_enter_plus_exit_time_us = 9.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - -static struct wm_table ddr4_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.90, - .sr_enter_plus_exit_time_us = 12.80, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - } -}; - -static struct wm_table ddr4_1R_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 7.32, - .sr_enter_plus_exit_time_us = 8.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; @@ -914,12 +690,10 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params /* * WM set D will be re-purposed for memory retraining */ - bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; - bw_params->wm_table.entries[WM_D].wm_inst = WM_D; - bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; - bw_params->wm_table.entries[WM_D].valid = true; + DC_FP_START(); + dcn21_clk_mgr_set_bw_params_wm_table(bw_params); + DC_FP_END(); } - } void rn_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h index e4322fa5475b..2e088c5171b2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h @@ -29,6 +29,13 @@ #include "clk_mgr.h" #include "dm_pp_smu.h" +extern struct wm_table ddr4_wm_table_gs; +extern struct wm_table lpddr4_wm_table_gs; +extern struct wm_table lpddr4_wm_table_with_disabled_ppt; +extern struct wm_table ddr4_wm_table_rn; +extern struct wm_table ddr4_1R_wm_table_rn; +extern struct wm_table lpddr4_wm_table_rn; + struct rn_clk_registers { uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */ }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index dc60b835e938..eeeae52fe6fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -42,6 +42,9 @@ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif +/* Constant */ +#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ + /** * DOC: DCN2x FPU manipulation Overview * @@ -650,6 +653,228 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_states = 8 }; +struct wm_table ddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 7.09, + .sr_enter_plus_exit_time_us = 8.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 5.32, + .sr_enter_plus_exit_time_us = 6.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_with_disabled_ppt = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 8.32, + .sr_enter_plus_exit_time_us = 9.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table ddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 11.90, + .sr_enter_plus_exit_time_us = 12.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + } +}; + +struct wm_table ddr4_1R_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 7.32, + .sr_enter_plus_exit_time_us = 8.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + void dcn20_populate_dml_writeback_from_context(struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) @@ -2068,3 +2293,13 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } + +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; + bw_params->wm_table.entries[WM_D].wm_inst = WM_D; + bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; + bw_params->wm_table.entries[WM_D].valid = true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index aa892193e485..a6e1ad0f38e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -82,4 +82,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, bool fast_validate); void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); + #endif /* __DCN20_FPU_H__ */ -- cgit v1.2.3 From ddd0fa1f47edd794736e70d165341a5c4ff78e47 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:07 -0100 Subject: drm/amd/display: move FPU code from dcn30 clk mgr to DML folder The -mno-gnu-attribute option in clk mgr makefile for dcn30 hides a soft vs hard fp error for powerpc. After removing this flag, we can see some FPU code remains there: gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.o uses soft float Therefore, remove the -mno-gnu-attribute flag for dcn30/powerpc and move FPU-associated code to DML folder. Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 6 --- .../amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 63 ++-------------------- .../gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 63 +++++++++++++++++++++- .../gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 1 + 4 files changed, 68 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 66dc02c426e9..15b660a951a5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -115,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) ############################################################################### CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to VanGogh APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 914708cefc79..3ce0ee0d012f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -29,6 +29,7 @@ #include "dcn20/dcn20_clk_mgr.h" #include "dce100/dce_clk_mgr.h" #include "dcn30/dcn30_clk_mgr.h" +#include "dml/dcn30/dcn30_fpu.h" #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" @@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t cl } } -static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) +static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - - /* Set A - Normal - default values*/ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher minimum clocks */ -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; - clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000; - clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000; - clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000; - clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - - /* Set D - MALL - SR enter and exit times adjusted for MALL */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; + DC_FP_START(); + dcn3_fpu_build_wm_range_table(&clk_mgr->base); + DC_FP_END(); } void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index a8db1306750e..c00f759fdded 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -29,7 +29,7 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" - +#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h" #include "display_mode_vba_30.h" #include "dcn30_fpu.h" @@ -616,4 +616,65 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, } +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) +{ + /* defaults */ + double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us; + double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz; + dc_assert_fp_enabled(); + + /* Set A - Normal - default values*/ + base->bw_params->wm_table.nv_entries[WM_A].valid = true; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher minimum clocks */ +// base->bw_params->wm_table.nv_entries[WM_B].valid = true; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + base->bw_params->wm_table.nv_entries[WM_C].valid = true; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600; + base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000; + base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000; + base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000; + base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + + /* Set D - MALL - SR enter and exit times adjusted for MALL */ + base->bw_params->wm_table.nv_entries[WM_D].valid = true; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index dedfe7b5f173..c2024052a497 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -63,5 +63,6 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, unsigned int *dcfclk_mhz, unsigned int *dram_speed_mts); +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); #endif /* __DCN30_FPU_H__*/ -- cgit v1.2.3 From 5085e0361f5a3675fc0d0919f7b69aded453ceb7 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:08 -0100 Subject: drm/amd/display: move FPU code from dcn301 clk mgr to DML folder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The -mno-gnu-attribute option in dcn301 clk mgr makefile hides a soft vs hard fp error for powerpc. After removing this flag, we can see some FPU code remains there: gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn301/vg_clk_mgr.o uses soft float Therefore, remove the -mno-gnu-attribute flag for dcn301/powerpc and move FPU-associated code to DML folder. Signed-off-by: Melissa Wen Reviewed-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 6 -- .../drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 85 ++-------------------- .../drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h | 3 + .../gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c | 74 +++++++++++++++++++ 4 files changed, 83 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 15b660a951a5..271d8e573181 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -123,12 +123,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) ############################################################################### CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to VanGogh APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index f310b0d25a07..24715ca2fa94 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -32,6 +32,9 @@ // For dcn20_update_clocks_update_dpp_dto #include "dcn20/dcn20_clk_mgr.h" +// For DML FPU code +#include "dml/dcn20/dcn20_fpu.h" + #include "vg_clk_mgr.h" #include "dcn301_smu.h" #include "reg_helper.h" @@ -526,81 +529,6 @@ static struct clk_bw_params vg_bw_params = { }; -static struct wm_table ddr4_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 6.09, - .sr_enter_plus_exit_time_us = 7.14, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - } -}; - -static struct wm_table lpddr5_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - } -}; - - static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_table, unsigned int voltage) { @@ -670,10 +598,9 @@ static void vg_clk_mgr_helper_populate_bw_params( /* * WM set D will be re-purposed for memory retraining */ - bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; - bw_params->wm_table.entries[WM_D].wm_inst = WM_D; - bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; - bw_params->wm_table.entries[WM_D].valid = true; + DC_FP_START(); + dcn21_clk_mgr_set_bw_params_wm_table(bw_params); + DC_FP_END(); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h index 7255477307f1..75884f572989 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h @@ -29,6 +29,9 @@ struct watermarks; +extern struct wm_table ddr4_wm_table; +extern struct wm_table lpddr5_wm_table; + struct smu_watermark_set { struct watermarks *wm_set; union large_integer mc_address; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index e4863f0bf0f6..7ef66e511ec8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -214,6 +214,80 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +struct wm_table ddr4_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 6.09, + .sr_enter_plus_exit_time_us = 7.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr5_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + } +}; + static void calculate_wm_set_for_vlevel(int vlevel, struct wm_range_table_entry *table_entry, struct dcn_watermarks *wm_set, -- cgit v1.2.3 From 77299956e8867a4cc19c5fd41b797c5152aad1dd Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 21 Jul 2022 17:21:12 -0400 Subject: drm/amd/display: Fix dc_version detect for dcn314 [Why] While parsing dc_version redundant check leads to invalid dc_version for dcn314. [How] Remove redundant check Fixes: ee7b62e127c8 ("drm/amd/display: Enable DCN314 in DC") Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bdaad4ce4b2d..752ba4ab2b1e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -74,6 +74,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) { enum dce_version dc_version = DCE_VERSION_UNKNOWN; + switch (asic_id.chip_family) { #if defined(CONFIG_DRM_AMD_DC_SI) @@ -169,8 +170,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) dc_version = DCN_VERSION_3_21; break; case AMDGPU_FAMILY_GC_11_0_2: - if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev)) - dc_version = DCN_VERSION_3_14; + dc_version = DCN_VERSION_3_14; break; default: dc_version = DCE_VERSION_UNKNOWN; -- cgit v1.2.3 From 1c05d9e53d2cfad1f99d9087a8ad24da4ebc0905 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 21 Jul 2022 17:52:23 -0400 Subject: drm/amd/display: Specify supported modifiers for dcn314 Use same modifiers as for AMDGPU_FAMILY_GC_11_0_0 Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 4702a53a0bf9..8cd25b2ea0dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1412,6 +1412,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, } break; case AMDGPU_FAMILY_GC_11_0_0: + case AMDGPU_FAMILY_GC_11_0_2: switch (AMD_FMT_MOD_GET(TILE, modifier)) { case AMD_FMT_MOD_TILE_GFX11_256K_R_X: case AMD_FMT_MOD_TILE_GFX9_64K_R_X: -- cgit v1.2.3 From 0593ad215359d51514c1e6c81ce28ea598efed6b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 20 Jul 2022 18:00:45 -0400 Subject: drm/amdkfd: Correct mmu_notifier_get failure handling If process has signal pending, mmu_notifier_get_locked fails and calls ops->free_notifier, kfd_process_free_notifier will schedule kfd_process_wq_release as process refcount is 1, but process structure is already freed. This use after free bug causes system crash with different backtrace. The fix is to increase process refcount and then decrease the refcount after mmu_notifier_get success. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fc38a4d81420..d8591721270b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1405,6 +1405,11 @@ static struct kfd_process *create_process(const struct task_struct *thread) hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); + /* Avoid free_notifier to start kfd_process_wq_release if + * mmu_notifier_get failed because of pending signal. + */ + kref_get(&process->ref); + /* MMU notifier registration must be the last call that can fail * because after this point we cannot unwind the process creation. * After this point, mmu_notifier_put will trigger the cleanup by @@ -1417,6 +1422,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) } BUG_ON(mn != &process->mmu_notifier); + kfd_unref_process(process); get_task_struct(process->lead_thread); return process; -- cgit v1.2.3 From 74097f9fd2f5ebdae04fcba59da345386415cbf3 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 7 Jul 2022 16:54:19 -0400 Subject: drm/amdkfd: Process notifier release callback don't take mutex Move process queues cleanup to deferred work kfd_process_wq_release, to avoid potential deadlock circular locking warning: WARNING: possible circular locking dependency detected the existing dependency chain (in reverse order) is: -> #2 ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}: __flush_work+0x343/0x4a0 svm_range_list_lock_and_flush_work+0x39/0xc0 svm_range_set_attr+0xe8/0x1080 [amdgpu] kfd_ioctl+0x19b/0x600 [amdgpu] __x64_sys_ioctl+0x81/0xb0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #1 (&info->lock#2){+.+.}-{3:3}: __mutex_lock+0xa4/0x940 amdgpu_amdkfd_gpuvm_acquire_process_vm+0x2e3/0x590 kfd_process_device_init_vm+0x61/0x200 [amdgpu] kfd_ioctl_acquire_vm+0x83/0xb0 [amdgpu] kfd_ioctl+0x19b/0x600 [amdgpu] __x64_sys_ioctl+0x81/0xb0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (&process->mutex){+.+.}-{3:3}: __lock_acquire+0x1365/0x23d0 lock_acquire+0xc9/0x2e0 __mutex_lock+0xa4/0x940 kfd_process_notifier_release+0x96/0xe0 [amdgpu] __mmu_notifier_release+0x94/0x210 exit_mmap+0x35/0x1f0 mmput+0x63/0x120 svm_range_deferred_list_work+0x177/0x2c0 [amdgpu] process_one_work+0x2a4/0x600 worker_thread+0x39/0x3e0 kthread+0x16d/0x1a0 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&svms->deferred_list_work)); lock(&info->lock#2); lock((work_completion)(&svms->deferred_list_work)); lock(&process->mutex); Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d8591721270b..6c83a519b3a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1115,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work) struct kfd_process *p = container_of(work, struct kfd_process, release_work); + kfd_process_dequeue_from_all_devices(p); + pqm_uninit(&p->pqm); + + /* Signal the eviction fence after user mode queues are + * destroyed. This allows any BOs to be freed without + * triggering pointless evictions or waiting for fences. + */ + dma_fence_signal(p->ef); + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); @@ -1179,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - mutex_lock(&p->mutex); - - kfd_process_dequeue_from_all_devices(p); - pqm_uninit(&p->pqm); - /* Indicate to other users that MM is no longer valid */ p->mm = NULL; - /* Signal the eviction fence after user mode queues are - * destroyed. This allows any BOs to be freed without - * triggering pointless evictions or waiting for fences. - */ - dma_fence_signal(p->ef); - - mutex_unlock(&p->mutex); mmu_notifier_put(&p->mmu_notifier); } -- cgit v1.2.3 From 7acc487ab57e076c823b2b7559aa9e3997962ca2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 16:00:56 -0400 Subject: drm/amd/display: reduce stack size in dcn32 dml (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move additional dummy structures off the stack and into the dummy vars structure. Fixes the following: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1659:1: error: the frame size of 2144 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 1659 | } | ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'dml32_ModeSupportAndSystemConfigurationFull': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1: error: the frame size of 2464 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 3799 | } // ModeSupportAndSystemConfigurationFull | ^ v2: move more stuff to dummy structure, fix init order (Alex) Acked-by: Christian König Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 411 +++++++++------------ .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 36 ++ 2 files changed, 217 insertions(+), 230 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index e9204c711cb9..9c2003fbe8fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -65,6 +65,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman int iteration; double MaxTotalRDBandwidth; unsigned int NextPrefetchMode; + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThanMax = false; + double TWait; + double TotalWRBandwidth = 0; + double WRBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: --- START ---\n", __func__); @@ -710,11 +716,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; do { - double MaxTotalRDBandwidthNoUrgentBurst = 0.0; - bool DestinationLineTimesForPrefetchLessThan2 = false; - bool VRatioPrefetchMoreThanMax = false; - double dummy_unit_vector[DC__NUM_DPP__MAX]; - MaxTotalRDBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); @@ -723,41 +724,39 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* NOTE PerfetchMode variable is invalid in DAL as per the input received. * Hence the direction is to use PrefetchModePerState. */ - double TWait = dml32_CalculateTWait( - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.UsesMALLForPStateChange[k], - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay[k], - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - - DmlPipe myPipe; - - myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; - myPipe.Dispclk = mode_lib->vba.DISPCLK; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; - myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; - myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; - myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; - myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &myPipe, v->DSCDelay[k], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, @@ -906,8 +905,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif { - double dummy_single[1]; - dml32_CalculatePrefetchBandwithSupport( mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, @@ -931,16 +928,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &MaxTotalRDBandwidth, - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->PrefetchModeSupported); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) - dummy_unit_vector[k] = 1.0; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0; { - double dummy_single[1]; - bool dummy_boolean[1]; dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, v->NoUrgentLatencyHidingPre, @@ -954,17 +949,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->FractionOfUrgentBandwidth, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) { @@ -1047,8 +1042,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } { - double dummy_single[2]; - bool dummy_boolean[1]; dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, mode_lib->vba.ImmediateFlipRequirement, @@ -1072,7 +1065,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth - &dummy_single[0], // Single *FractionOfUrgentBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, @@ -1089,17 +1082,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[1], // Single *TotalBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth - &dummy_boolean[0]); // Boolean *ImmediateFlipBandwidthSupport + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { @@ -1157,22 +1150,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman //Watermarks and NB P-State/DRAM Clock Change Support { - SOCParametersList mmSOCParameters; - enum clock_change_support dummy_dramchange_support; - enum dm_fclock_change_support dummy_fclkchange_support; - bool dummy_USRRetrainingSupport; - - mmSOCParameters.UrgentLatency = v->UrgentLatency; - mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; - mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; - mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; - mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; - mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; - mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; - mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; - mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; - mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; - mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( mode_lib->vba.USRRetrainingRequiredFinal, @@ -1190,7 +1178,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dpte_group_bytes, v->meta_row_height, v->meta_row_height_chroma, - mmSOCParameters, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, mode_lib->vba.WritebackChunkSize, mode_lib->vba.SOCCLK, v->DCFCLKDeepSleep, @@ -1227,12 +1215,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* Output */ &v->Watermark, - &dummy_dramchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, - &dummy_fclkchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support, &v->MinActiveFCLKChangeLatencySupported, - &dummy_USRRetrainingSupport, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport, mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); /* DCN32 has a new struct Watermarks (typedef) which is used to store @@ -1494,9 +1482,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double WRBandwidth = 0; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { @@ -1590,9 +1575,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #ifdef __DML_VBA_ALLOW_DELTA__ { - double dummy_single[2]; unsigned int dummy_integer[1]; - bool dummy_boolean[1]; // Calculate z8 stutter eff assuming 0 reserved space dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, @@ -1645,14 +1628,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->meta_row_bw, v->dpte_row_bw, /* Output */ - &dummy_single[0], - &dummy_single[1], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], &dummy_integer[0], &v->Z8StutterEfficiencyNotIncludingVBlankBestCase, &v->Z8StutterEfficiencyBestCase, &v->Z8NumberOfStutterBurstsPerFrameBestCase, &v->StutterPeriodBestCase, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } #else v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank; @@ -1668,32 +1651,18 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { - unsigned int dummy_integer[4]; - bool dummy_boolean[2]; - bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; - bool MPCCombineMethodAsPossible; - enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; - unsigned int TotalNumberOfActiveOTG; - unsigned int TotalNumberOfActiveHDMIFRL; - unsigned int TotalNumberOfActiveDP2p0; - unsigned int TotalNumberOfActiveDP2p0Outputs; - unsigned int TotalDSCUnitsRequired; - unsigned int m; - unsigned int ReorderingBytes; - bool FullFrameMALLPStateMethod; - bool SubViewportMALLPStateMethod; - bool PhantomPipeMALLPStateMethod; + struct vba_vars_st *v = &mode_lib->vba; + int i, j; + unsigned int k, m; unsigned int MaximumMPCCombine; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalSlots; bool CompBufReservedSpaceNeedAdjustment; bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); #endif - struct vba_vars_st *v = &mode_lib->vba; - - int i, j; - unsigned int k; /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ @@ -1945,7 +1914,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockHeightC, mode_lib->vba.Read256BlockWidthY, mode_lib->vba.Read256BlockWidthC, - dummy_odm_mode, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode, mode_lib->vba.BlendingAndTiming, mode_lib->vba.BytePerPixelY, mode_lib->vba.BytePerPixelC, @@ -1973,35 +1942,26 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ - MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; - MPCCombineMethodAsPossible = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks) - MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible) - MPCCombineMethodAsPossible = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true; } - mode_lib->vba.MPCCombineMethodIncompatible = MPCCombineMethodAsNeededForPStateChangeAndVoltage - && MPCCombineMethodAsPossible; + mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible; for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { - bool NoChroma; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0; mode_lib->vba.TotalAvailablePipesSupport[i][j] = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - - bool TotalAvailablePipesSupportNoDSC; - unsigned int NumberOfDPPNoDSC; - enum odm_combine_mode ODMModeNoDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceNoDSC; - bool TotalAvailablePipesSupportDSC; - unsigned int NumberOfDPPDSC; - enum odm_combine_mode ODMModeDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceDSC; - dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, mode_lib->vba.HActive[k], @@ -2018,10 +1978,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportNoDSC, - &NumberOfDPPNoDSC, - &ODMModeNoDSC, - &RequiredDISPCLKPerSurfaceNoDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC); dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, @@ -2039,10 +1999,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportDSC, - &NumberOfDPPDSC, - &ODMModeDSC, - &RequiredDISPCLKPerSurfaceDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC); dml32_CalculateOutputLink( mode_lib->vba.PHYCLKPerState[i], @@ -2060,8 +2020,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.AudioSampleRate[k], mode_lib->vba.AudioSampleLayout[k], - ODMModeNoDSC, - ODMModeDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, mode_lib->vba.DSCEnable[k], mode_lib->vba.OutputLinkDPLanes[k], mode_lib->vba.OutputLinkDPRate[k], @@ -2075,21 +2035,21 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.RequiredSlots[i][k]); if (mode_lib->vba.RequiresDSC[i][k] == false) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeNoDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceNoDSC; - if (!TotalAvailablePipesSupportNoDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPNoDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC; } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceDSC; - if (!TotalAvailablePipesSupportDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC; } } @@ -2124,7 +2084,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0; - NoChroma = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.NoOfDPP[i][j][k] == 1) @@ -2134,7 +2094,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 || mode_lib->vba.SourcePixelFormat[k] == dm_420_12 || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChroma = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false; } } @@ -2145,15 +2105,15 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma, mode_lib->vba.Output[0], mode_lib->vba.SurfaceTiling[0], CompBufReservedSpaceNeedAdjustment, mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { - double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; - unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] @@ -2161,13 +2121,13 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage && mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k] > - BWOfNonCombinedSurfaceOfMaximumBandwidth && + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth && (mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_2to1 && mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) && mode_lib->vba.MPCCombine[i][j][k] == false) { - BWOfNonCombinedSurfaceOfMaximumBandwidth = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k]; NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; @@ -2233,28 +2193,28 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } // i (VOLTAGE_STATE) /* Total Available OTG, HDMIFRL, DP Support Check */ - TotalNumberOfActiveOTG = 0; - TotalNumberOfActiveHDMIFRL = 0; - TotalNumberOfActiveDP2p0 = 0; - TotalNumberOfActiveDP2p0Outputs = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.BlendingAndTiming[k] == k) { - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1; if (mode_lib->vba.Output[k] == dm_dp2p0) { - TotalNumberOfActiveDP2p0 = TotalNumberOfActiveDP2p0 + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1; if (mode_lib->vba.OutputMultistreamId[k] == k || mode_lib->vba.OutputMultistreamEn[k] == false) { - TotalNumberOfActiveDP2p0Outputs = TotalNumberOfActiveDP2p0Outputs + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1; } } } } - mode_lib->vba.NumberOfOTGSupport = (TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); - mode_lib->vba.NumberOfHDMIFRLSupport = (TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); - mode_lib->vba.NumberOfDP2p0Support = (TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams - && TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); + mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); + mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); + mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); /* Display IO and DSC Support Check */ mode_lib->vba.NonsupportedDSCInputBPC = false; @@ -2269,8 +2229,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (i = 0; i < v->soc.num_states; ++i) { - unsigned int TotalSlots; - mode_lib->vba.ExceededMultistreamSlots[i] = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) { @@ -2441,12 +2399,12 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /* Check DSC Unit and Slices Support */ - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; for (i = 0; i < v->soc.num_states; ++i) { mode_lib->vba.NotEnoughDSCUnits[i] = false; mode_lib->vba.NotEnoughDSCSlices[i] = false; - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.RequiresDSC[i][k] == true) { @@ -2454,33 +2412,31 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.HActive[k] > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 4; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4; if (mode_lib->vba.NumberOfDSCSlices[k] > 16) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.HActive[k] > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 2; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2; if (mode_lib->vba.NumberOfDSCSlices[k] > 8) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else { if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1; if (mode_lib->vba.NumberOfDSCSlices[k] > 4) mode_lib->vba.NotEnoughDSCSlices[i] = true; } } } - if (TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) mode_lib->vba.NotEnoughDSCUnits[i] = true; } /*DSC Delay per state*/ for (i = 0; i < v->soc.num_states; ++i) { - unsigned int m; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement( mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k], @@ -2576,8 +2532,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, - &dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ - &dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); @@ -2942,7 +2898,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - ReorderingBytes = mode_lib->vba.NumberOfChannels + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); @@ -2998,20 +2954,20 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)); } - FullFrameMALLPStateMethod = false; - SubViewportMALLPStateMethod = false; - PhantomPipeMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - FullFrameMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - SubViewportMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) - PhantomPipeMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true; } - mode_lib->vba.InvalidCombinationOfMALLUseForPState = (SubViewportMALLPStateMethod - != PhantomPipeMALLPStateMethod) || (SubViewportMALLPStateMethod && FullFrameMALLPStateMethod); + mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod + != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod); if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) { dml32_UseMinimumDCFCLK( @@ -3025,7 +2981,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SREnterPlusExitTime, mode_lib->vba.ReturnBusWidth, mode_lib->vba.RoundTripPingLatencyCycles, - ReorderingBytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.MetaChunkSize, mode_lib->vba.GPUVMEnable, @@ -3088,7 +3044,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.ReturnBWPerState[i][j] > (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLKState[i][j] - + ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { mode_lib->vba.ROBSupport[i][j] = true; } else { mode_lib->vba.ROBSupport[i][j] = false; @@ -3130,9 +3086,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i < (int) v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { - double VMDataOnlyReturnBWPerState; - double HostVMInefficiencyFactor; - unsigned int NextPrefetchModeState; mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; @@ -3172,37 +3125,35 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); - VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); - HostVMInefficiencyFactor = 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1; if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) - HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] - / VMDataOnlyReturnBWPerState; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] + / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState; mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency( - mode_lib->vba.RoundTripPingLatencyCycles, ReorderingBytes, + mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j], mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes, - HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, mode_lib->vba.HostVMMaxNonCachedPageTableLevels); - NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; do { - mode_lib->vba.PrefetchModePerState[i][j] = NextPrefetchModeState; + mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - DmlPipe myPipe; - mode_lib->vba.TWait = dml32_CalculateTWait( mode_lib->vba.PrefetchModePerState[i][j], mode_lib->vba.UsesMALLForPStateChange[k], @@ -3212,34 +3163,34 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.SREnterPlusExitTime); - myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; - myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; - myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( - HostVMInefficiencyFactor, - &myPipe, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, @@ -3298,7 +3249,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup - &dummy_integer[0], // unsigned int *VUpdateOffsetPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix } @@ -3427,7 +3378,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - dml32_CalculateFlipSchedule(HostVMInefficiencyFactor, + dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.ExtraLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.GPUVMMaxPageTableLevels, @@ -3501,7 +3452,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; - NextPrefetchModeState = NextPrefetchModeState + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } @@ -3515,7 +3466,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true)) || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j] - && NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.use_one_row_for_frame_this_state[k] = @@ -3591,7 +3542,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Watermark, // Store the values in vba &mode_lib->vba.DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported - &dummy_integer[0], // Long SubViewportLinesNeededInMALL[] + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] &mode_lib->vba.FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 6e61b5382361..492aec634b68 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -196,6 +196,13 @@ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCal unsigned int ReorderBytes; unsigned int VMDataOnlyReturnBW; double HostVMInefficiencyFactor; + DmlPipe myPipe; + SOCParametersList mmSOCParameters; + double dummy_unit_vector[DC__NUM_DPP__MAX]; + double dummy_single[2]; + enum clock_change_support dummy_dramchange_support; + enum dm_fclock_change_support dummy_fclkchange_support; + bool dummy_USRRetrainingSupport; }; struct dml32_ModeSupportAndSystemConfigurationFull { @@ -211,6 +218,35 @@ struct dml32_ModeSupportAndSystemConfigurationFull { double DSTXAfterScaler[DC__NUM_DPP__MAX]; double MaxTotalVActiveRDBandwidth; bool dummy_boolean_array[2][DC__NUM_DPP__MAX]; + enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; + DmlPipe myPipe; + unsigned int dummy_integer[4]; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + unsigned int TotalSlots; + unsigned int NumberOfDPPDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NextPrefetchModeState; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool NoChroma; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + enum odm_combine_mode ODMModeNoDSC; + enum odm_combine_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + double VMDataOnlyReturnBWPerState; + double HostVMInefficiencyFactor; + bool dummy_boolean[2]; }; struct dummy_vars { -- cgit v1.2.3 From bac4b41d917a1d999308bb1e779f8c3b39c19f67 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 23:06:49 -0400 Subject: drm/amd/display: reduce stack for dml32_CalculateSwathAndDETConfiguration Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 3 + .../dc/dml/dcn32/display_mode_vba_util_32.c | 121 ++++++++++----------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 2 + .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 15 +++ 4 files changed, 77 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9c2003fbe8fa..c0e537731c1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -221,6 +221,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman // VBA_DELTA // Calculate DET size, swath height dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -1878,6 +1879,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -2474,6 +2476,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index c8a3f367d622..da5befd7fdec 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -391,6 +391,7 @@ void dml32_CalculateBytePerPixelAndBlockSizes( } // CalculateBytePerPixelAndBlockSizes void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -455,18 +456,10 @@ void dml32_CalculateSwathAndDETConfiguration( bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { - unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; - unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpSwathSizeBytesY; - unsigned int RoundedUpSwathSizeBytesC; - double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; - double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; unsigned int k; - unsigned int TotalActiveDPP = 0; - bool NoChromaSurfaces = true; - unsigned int DETBufferSizeInKByteForSwathCalculation; + + st_vars->TotalActiveDPP = 0; + st_vars->NoChromaSurfaces = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); @@ -501,43 +494,43 @@ void dml32_CalculateSwathAndDETConfiguration( DPPPerSurface, /* Output */ - SwathWidthdoubleDPP, - SwathWidthdoubleDPPChroma, + st_vars->SwathWidthdoubleDPP, + st_vars->SwathWidthdoubleDPPChroma, SwathWidth, SwathWidthChroma, - MaximumSwathHeightY, - MaximumSwathHeightC, + st_vars->MaximumSwathHeightY, + st_vars->MaximumSwathHeightC, swath_width_luma_ub, swath_width_chroma_ub); for (k = 0; k < NumberOfActiveSurfaces; ++k) { - RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; - RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; + st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; + st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesY[k]); + st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesC[k]); + st_vars->RoundedUpMaxSwathSizeBytesC[k]); #endif if (SourcePixelFormat[k] == dm_420_10) { - RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); - RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); + st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChromaSurfaces = false; + st_vars->NoChromaSurfaces = false; } } @@ -547,10 +540,10 @@ void dml32_CalculateSwathAndDETConfiguration( // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req - *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); if (*CompBufReservedSpaceNeedAdjustment == 1) { - *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; + *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; } #ifdef __DML_VBA_DEBUG__ @@ -558,7 +551,7 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); #endif - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -573,8 +566,8 @@ void dml32_CalculateSwathAndDETConfiguration( SourcePixelFormat, ReadBandwidthLuma, ReadBandwidthChroma, - RoundedUpMaxSwathSizeBytesY, - RoundedUpMaxSwathSizeBytesC, + st_vars->RoundedUpMaxSwathSizeBytesY, + st_vars->RoundedUpMaxSwathSizeBytesC, DPPPerSurface, /* Output */ @@ -582,7 +575,7 @@ void dml32_CalculateSwathAndDETConfiguration( CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); @@ -593,42 +586,42 @@ void dml32_CalculateSwathAndDETConfiguration( *ViewportSizeSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == + st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, - DETBufferSizeInKByteForSwathCalculation); + st_vars->DETBufferSizeInKByteForSwathCalculation); #endif - if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } else { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } - if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) + if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { *ViewportSizeSupport = false; @@ -643,7 +636,7 @@ void dml32_CalculateSwathAndDETConfiguration( #endif DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; DETBufferSizeC[k] = 0; - } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { + } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); #endif @@ -661,11 +654,11 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesY[k]); + k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesC[k]); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); + k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index d293856ba906..158cfa2af1af 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -30,6 +30,7 @@ #include "os_types.h" #include "../dc_features.h" #include "../display_mode_structs.h" +#include "dml/display_mode_vba.h" unsigned int dml32_dscceComputeDelay( unsigned int bpc, @@ -81,6 +82,7 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( double *DPPCLKUsingSingleDPP); void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 492aec634b68..6cf814c2cc8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -182,6 +182,20 @@ void Calculate256BBlockSizes( unsigned int *BlockWidth256BytesY, unsigned int *BlockWidth256BytesC); +struct dml32_CalculateSwathAndDETConfiguration { + unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; + unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpSwathSizeBytesY; + unsigned int RoundedUpSwathSizeBytesC; + double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; + double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; + unsigned int TotalActiveDPP; + bool NoChromaSurfaces; + unsigned int DETBufferSizeInKByteForSwathCalculation; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -253,6 +267,7 @@ struct dummy_vars { struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; + struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; }; struct vba_vars_st { -- cgit v1.2.3 From c3b3f9ba25e6cbe59673505fbc5fff6c4cda0ef7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 23:25:25 -0400 Subject: drm/amd/display: reduce stack for dml32_CalculateVMRowAndSwath Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 + .../dc/dml/dcn32/display_mode_vba_util_32.c | 110 +++++++++------------ .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 19 ++++ 4 files changed, 70 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index c0e537731c1f..0ecc9e4c52a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -461,6 +461,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { dml32_CalculateVMRowAndSwath( + &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters, v->SurfaceSizeInMALL, @@ -2676,6 +2677,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateVMRowAndSwath( + &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters, mode_lib->vba.SurfaceSizeInMALL, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index da5befd7fdec..9ebd3207ce42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1867,6 +1867,7 @@ void dml32_CalculateSurfaceSizeInMall( } // CalculateSurfaceSizeInMall void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -1932,21 +1933,6 @@ void dml32_CalculateVMRowAndSwath( unsigned int BIGK_FRAGMENT_SIZE[]) { unsigned int k; - unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; - unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; - unsigned int PDEAndMetaPTEBytesFrameY; - unsigned int PDEAndMetaPTEBytesFrameC; - unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; - unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; - bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (HostVMEnable == true) { @@ -1968,15 +1954,15 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && !IsVertical(myPipe[k].SourceRotation)) { - PTEBufferSizeInRequestsForLuma[k] = + st_vars->PTEBufferSizeInRequestsForLuma[k] = (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; + st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; } - PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -1996,21 +1982,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForChroma[k], + st_vars->PTEBufferSizeInRequestsForChroma[k], myPipe[k].PitchC, myPipe[k].DCCMetaPitchC, myPipe[k].BlockWidthC, myPipe[k].BlockHeightC, /* Output */ - &MetaRowByteC[k], - &PixelPTEBytesPerRowC[k], + &st_vars->MetaRowByteC[k], + &st_vars->PixelPTEBytesPerRowC[k], &dpte_row_width_chroma_ub[k], &dpte_row_height_chroma[k], &dpte_row_height_linear_chroma[k], - &PixelPTEBytesPerRowC_one_row_per_frame[k], - &dpte_row_width_chroma_ub_one_row_per_frame[k], - &dpte_row_height_chroma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], + &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_chroma_one_row_per_frame[k], &meta_req_width_chroma[k], &meta_req_height_chroma[k], &meta_row_width_chroma[k], @@ -2038,19 +2024,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillC[k], &MaxNumSwathC[k]); } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; - PTEBufferSizeInRequestsForChroma[k] = 0; - PixelPTEBytesPerRowC[k] = 0; - PDEAndMetaPTEBytesFrameC = 0; - MetaRowByteC[k] = 0; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; + st_vars->PixelPTEBytesPerRowC[k] = 0; + st_vars->PDEAndMetaPTEBytesFrameC = 0; + st_vars->MetaRowByteC[k] = 0; MaxNumSwathC[k] = 0; PrefetchSourceLinesC[k] = 0; - dpte_row_height_chroma_one_row_per_frame[k] = 0; - dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; + st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; } - PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2070,21 +2056,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForLuma[k], + st_vars->PTEBufferSizeInRequestsForLuma[k], myPipe[k].PitchY, myPipe[k].DCCMetaPitchY, myPipe[k].BlockWidthY, myPipe[k].BlockHeightY, /* Output */ - &MetaRowByteY[k], - &PixelPTEBytesPerRowY[k], + &st_vars->MetaRowByteY[k], + &st_vars->PixelPTEBytesPerRowY[k], &dpte_row_width_luma_ub[k], &dpte_row_height_luma[k], &dpte_row_height_linear_luma[k], - &PixelPTEBytesPerRowY_one_row_per_frame[k], - &dpte_row_width_luma_ub_one_row_per_frame[k], - &dpte_row_height_luma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], + &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_luma_one_row_per_frame[k], &meta_req_width[k], &meta_req_height[k], &meta_row_width[k], @@ -2112,19 +2098,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillY[k], &MaxNumSwathY[k]); - PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; - MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; + PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; + MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; - if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { + if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { PTEBufferSizeNotExceeded[k] = true; } else { PTEBufferSizeNotExceeded[k] = false; } - one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * - PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); + st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * + st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); } dml32_CalculateMALLUseForStaticScreen( @@ -2132,7 +2118,7 @@ void dml32_CalculateVMRowAndSwath( MALLAllocatedForDCN, UseMALLForStaticScreen, // mode SurfaceSizeInMALL, - one_row_per_frame_fits_in_buffer, + st_vars->one_row_per_frame_fits_in_buffer, /* Output */ UsesMALLForStaticScreen); // boolen @@ -2158,13 +2144,13 @@ void dml32_CalculateVMRowAndSwath( !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); if (use_one_row_for_frame[k]) { - dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; - dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; - dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; - dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; - PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; + dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; + dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; + dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; + dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; + PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; } if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) @@ -2172,7 +2158,7 @@ void dml32_CalculateVMRowAndSwath( else DCCMetaBufferSizeNotExceeded[k] = false; - PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; + PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; if (use_one_row_for_frame[k]) PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; @@ -2183,11 +2169,11 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].VRatioChroma, myPipe[k].DCCEnable, myPipe[k].HTotal / myPipe[k].PixelClock, - MetaRowByteY[k], MetaRowByteC[k], + st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], meta_row_height[k], meta_row_height_chroma[k], - PixelPTEBytesPerRowY[k], - PixelPTEBytesPerRowC[k], + st_vars->PixelPTEBytesPerRowY[k], + st_vars->PixelPTEBytesPerRowC[k], dpte_row_height_luma[k], dpte_row_height_chroma[k], @@ -2203,12 +2189,12 @@ void dml32_CalculateVMRowAndSwath( dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", __func__, k, dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", __func__, k, dpte_row_height_chroma[k]); dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", __func__, k, dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", __func__, k, PTEBufferSizeNotExceeded[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 158cfa2af1af..974006e5ecb7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -362,6 +362,7 @@ void dml32_CalculateSurfaceSizeInMall( bool *ExceededMALLSize); void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 6cf814c2cc8d..b326184cfa4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -196,6 +196,24 @@ struct dml32_CalculateSwathAndDETConfiguration { unsigned int DETBufferSizeInKByteForSwathCalculation; }; +struct dml32_CalculateVMRowAndSwath { + unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; + unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; + unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; + bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -268,6 +286,7 @@ struct dummy_vars { DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; + struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; }; struct vba_vars_st { -- cgit v1.2.3 From 3c3abac60117cfd09460980d9a14c253b37f7b00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 23:50:08 -0400 Subject: drm/amd/display: reduce stack for dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 + .../dc/dml/dcn32/display_mode_vba_util_32.c | 187 +++++++++------------ .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 34 ++++ 4 files changed, 120 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 0ecc9e4c52a6..f7d108123b07 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1165,6 +1165,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], @@ -3493,6 +3494,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[i][j], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 9ebd3207ce42..40b4c88ff2e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -4185,6 +4185,7 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, @@ -4246,37 +4247,15 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( double ActiveDRAMClockChangeLatencyMargin[]) { unsigned int i, j, k; - unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; - unsigned int DRAMClockChangeSupportNumber = 0; - unsigned int LastSurfaceWithoutMargin; - unsigned int DRAMClockChangeMethod = 0; - bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; - double MinActiveFCLKChangeMargin = 0.; - double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; - double ActiveClockChangeLatencyHidingY; - double ActiveClockChangeLatencyHidingC; - double ActiveClockChangeLatencyHiding; - double EffectiveDETBufferSizeY; - double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; - double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; - double TotalPixelBW = 0.0; - bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; - double EffectiveLBLatencyHidingY; - double EffectiveLBLatencyHidingC; - double LinesInDETY[DC__NUM_DPP__MAX]; - double LinesInDETC[DC__NUM_DPP__MAX]; - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; - double FullDETBufferingTimeY; - double FullDETBufferingTimeC; - double WritebackDRAMClockChangeLatencyMargin; - double WritebackFCLKChangeLatencyMargin; - double WritebackLatencyHiding; - bool SameTimingForFCLKChange; - - unsigned int TotalActiveWriteback = 0; - unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; - unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; + + st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; + st_vars->DRAMClockChangeSupportNumber = 0; + st_vars->DRAMClockChangeMethod = 0; + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; + st_vars->MinActiveFCLKChangeMargin = 0.; + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; + st_vars->TotalPixelBW = 0.0; + st_vars->TotalActiveWriteback = 0; Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency @@ -4308,13 +4287,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif - TotalActiveWriteback = 0; + st_vars->TotalActiveWriteback = 0; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (WritebackEnable[k] == true) - TotalActiveWriteback = TotalActiveWriteback + 1; + st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; } - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency @@ -4324,7 +4303,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency @@ -4354,14 +4333,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + + st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); + st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ @@ -4372,72 +4351,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); #endif - EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); - EffectiveDETBufferSizeY = DETBufferSizeY[k]; + st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { - EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; } - LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; - LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); + st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; - ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; - LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) + st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); + st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; - ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; } - ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, - ActiveClockChangeLatencyHidingC); + st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, + st_vars->ActiveClockChangeLatencyHidingC); } else { - ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; + st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->DRAMClockChangeWatermark; - ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->FCLKChangeWatermark; - USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; if (WritebackEnable[k]) { - WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 + st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); if (WritebackPixelFormat[k] == dm_444_64) - WritebackLatencyHiding = WritebackLatencyHiding / 2; + st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; - WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackDRAMClockChangeWatermark; - WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], - WritebackFCLKChangeLatencyMargin); - ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], - WritebackDRAMClockChangeLatencyMargin); + st_vars->WritebackFCLKChangeLatencyMargin); + st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], + st_vars->WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? @@ -4456,41 +4435,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (DRRDisplay[i] || DRRDisplay[j]))) { - SynchronizedSurfaces[i][j] = true; + st_vars->SynchronizedSurfaces[i][j] = true; } else { - SynchronizedSurfaces[i][j] = false; + st_vars->SynchronizedSurfaces[i][j] = false; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || - ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { - FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; - MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; - SurfaceWithMinActiveFCLKChangeMargin = k; + (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || + st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; + st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; + st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; } } - *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; + *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; - SameTimingForFCLKChange = true; + st_vars->SameTimingForFCLKChange = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { + if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (SameTimingForFCLKChange || - ActiveFCLKChangeLatencyMargin[k] < - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; + (st_vars->SameTimingForFCLKChange || + st_vars->ActiveFCLKChangeLatencyMargin[k] < + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; } - SameTimingForFCLKChange = false; + st_vars->SameTimingForFCLKChange = false; } } - if (MinActiveFCLKChangeMargin > 0) { + if (st_vars->MinActiveFCLKChangeMargin > 0) { *FCLKChangeSupport = dm_fclock_change_vactive; - } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && + } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && (PrefetchMode <= 1)) { *FCLKChangeSupport = dm_fclock_change_vblank; } else { @@ -4500,7 +4479,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *USRRetrainingSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (USRRetrainingLatencyMargin[k] < 0)) { + (st_vars->USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } @@ -4511,42 +4490,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { - DRAMClockChangeSupportNumber = 2; - } else if (DRAMClockChangeSupportNumber == 0) { - DRAMClockChangeSupportNumber = 1; - LastSurfaceWithoutMargin = k; - } else if (DRAMClockChangeSupportNumber == 1 && - !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { - DRAMClockChangeSupportNumber = 2; + st_vars->DRAMClockChangeSupportNumber = 2; + } else if (st_vars->DRAMClockChangeSupportNumber == 0) { + st_vars->DRAMClockChangeSupportNumber = 1; + st_vars->LastSurfaceWithoutMargin = k; + } else if (st_vars->DRAMClockChangeSupportNumber == 1 && + !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { + st_vars->DRAMClockChangeSupportNumber = 2; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - DRAMClockChangeMethod = 1; + st_vars->DRAMClockChangeMethod = 1; else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - DRAMClockChangeMethod = 2; + st_vars->DRAMClockChangeMethod = 2; } - if (DRAMClockChangeMethod == 0) { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeMethod == 0) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; - } else if (DRAMClockChangeMethod == 1) { - if (DRAMClockChangeSupportNumber == 0) + } else if (st_vars->DRAMClockChangeMethod == 1) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } else { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; @@ -4560,7 +4539,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); - src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ @@ -4568,7 +4547,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); -dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); +dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); @@ -4579,7 +4558,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l if (BytePerPixelDETC[k] > 0) { src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); - src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 974006e5ecb7..ecd4f1e7d1f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -810,6 +810,7 @@ void dml32_CalculateFlipSchedule( bool *ImmediateFlipSupportedForPipe); void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index b326184cfa4a..42e4e4c5e656 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -214,6 +214,39 @@ struct dml32_CalculateVMRowAndSwath { bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; }; +struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { + unsigned int SurfaceWithMinActiveFCLKChangeMargin; + unsigned int DRAMClockChangeSupportNumber; + unsigned int LastSurfaceWithoutMargin; + unsigned int DRAMClockChangeMethod; + bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin; + double MinActiveFCLKChangeMargin; + double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + double EffectiveDETBufferSizeY; + double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; + double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; + double TotalPixelBW; + bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC[DC__NUM_DPP__MAX]; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + bool SameTimingForFCLKChange; + unsigned int TotalActiveWriteback; + unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; + unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -287,6 +320,7 @@ struct dummy_vars { struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; }; struct vba_vars_st { -- cgit v1.2.3 From 86e4863e67a9bd1e257f162f3d740ebb61206c91 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 21 Jul 2022 00:35:35 -0400 Subject: drm/amd/display: reduce stack for dml32_CalculatePrefetchSchedule Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 5 +- .../dc/dml/dcn32/display_mode_vba_util_32.c | 394 ++++++++++----------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 38 ++ 4 files changed, 227 insertions(+), 211 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index f7d108123b07..db3e43499a26 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -757,7 +757,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( + &v->dummy_vars.dml32_CalculatePrefetchSchedule, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, @@ -3195,6 +3197,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( + &v->dummy_vars.dml32_CalculatePrefetchSchedule, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 40b4c88ff2e7..4b010b1b8aed 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3342,6 +3342,7 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -3405,45 +3406,18 @@ bool dml32_CalculatePrefetchSchedule( double *VReadyOffsetPix) { bool MyError = false; - unsigned int DPPCycles, DISPCLKCycles; - double DSTTotalPixelsAfterScaler; - double LineTime; - double dst_y_prefetch_equ; - double prefetch_bw_oto; - double Tvm_oto; - double Tr0_oto; - double Tvm_oto_lines; - double Tr0_oto_lines; - double dst_y_prefetch_oto; - double TimeForFetchingMetaPTE = 0; - double TimeForFetchingRowInVBlank = 0; - double LinesToRequestPrefetchPixelData = 0; - unsigned int HostVMDynamicLevelsTrips; - double trip_to_mem; - double Tvm_trips; - double Tr0_trips; - double Tvm_trips_rounded; - double Tr0_trips_rounded; - double Lsw_oto; - double Tpre_rounded; - double prefetch_bw_equ; - double Tvm_equ; - double Tr0_equ; - double Tdmbf; - double Tdmec; - double Tdmsks; - double prefetch_sw_bytes; - double bytes_pp; - double dep_bytes; - unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; - double min_Lsw; - double Tsw_est1 = 0; - double Tsw_est3 = 0; + + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; + st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; + st_vars->Tsw_est1 = 0; + st_vars->Tsw_est3 = 0; if (GPUVMEnable == true && HostVMEnable == true) - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; else - HostVMDynamicLevelsTrips = 0; + st_vars->HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); @@ -3466,19 +3440,19 @@ bool dml32_CalculatePrefetchSchedule( TSetup, /* output */ - &Tdmbf, - &Tdmec, - &Tdmsks, + &st_vars->Tdmbf, + &st_vars->Tdmec, + &st_vars->Tdmsks, VUpdateOffsetPix, VUpdateWidthPix, VReadyOffsetPix); - LineTime = myPipe->HTotal / myPipe->PixelClock; - trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; + st_vars->trip_to_mem = UrgentLatency; + st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); if (DynamicMetadataVMEnabled == true) - *Tdmdl = TWait + Tvm_trips + trip_to_mem; + *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; @@ -3488,15 +3462,15 @@ bool dml32_CalculatePrefetchSchedule( #endif if (DynamicMetadataEnable == true) { - if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", - __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", - __func__, Tdmsks); + __func__, st_vars->Tdmsks); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); #endif @@ -3508,21 +3482,21 @@ bool dml32_CalculatePrefetchSchedule( } *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + Tvm_trips : 0); + GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * + *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * myPipe->PixelClock / myPipe->Dispclk + DSCDelay; *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) @@ -3532,10 +3506,10 @@ bool dml32_CalculatePrefetchSchedule( + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); + dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); @@ -3548,9 +3522,9 @@ bool dml32_CalculatePrefetchSchedule( else *DSTYAfterScaler = 0; - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); @@ -3558,132 +3532,132 @@ bool dml32_CalculatePrefetchSchedule( MyError = false; - Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); + st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); if (GPUVMEnable == true) { - Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * + (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { - Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / - 4.0 * LineTime; // VBA_ERROR + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / + 4.0 * st_vars->LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; } else { *Tno_bw = 0; } } else if (myPipe->DCCEnable == true) { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; *Tno_bw = 0; } else { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = LineTime / 2.0; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; *Tno_bw = 0; } - Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); - Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); + st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); + st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; } else { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; } - prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, - prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); + st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, + st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); - min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; - min_Lsw = dml_max(min_Lsw, 1.0); - Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; + st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; + st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); + st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; if (GPUVMEnable == true) { - Tvm_oto = dml_max3( - Tvm_trips, - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, - LineTime / 4.0); + st_vars->Tvm_oto = dml_max3( + st_vars->Tvm_trips, + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, + st_vars->LineTime / 4.0); } else - Tvm_oto = LineTime / 4.0; + st_vars->Tvm_oto = st_vars->LineTime / 4.0; if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_oto = dml_max4( - Tr0_trips, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, - (LineTime - Tvm_oto)/2.0, - LineTime / 4.0); + st_vars->Tr0_oto = dml_max4( + st_vars->Tr0_trips, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, + (st_vars->LineTime - st_vars->Tvm_oto)/2.0, + st_vars->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); #endif } else - Tr0_oto = (LineTime - Tvm_oto) / 2.0; + st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; - Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; - Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; - dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; + st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; + st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; + st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; - dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - + st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); - dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); + dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); - dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); - dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); - dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); - dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); - dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); #endif - dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; - Tpre_rounded = dst_y_prefetch_equ * LineTime; + st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; + st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); - dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); + dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", - __func__, VStartup * LineTime); + __func__, VStartup * st_vars->LineTime); dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", __func__, *DSTYAfterScaler); #endif - dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, + st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); - if (prefetch_sw_bytes < dep_bytes) - prefetch_sw_bytes = 2 * dep_bytes; + if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) + st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -3691,61 +3665,61 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (dst_y_prefetch_equ > 1) { + if (st_vars->dst_y_prefetch_equ > 1) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; double PrefetchBandwidth4; - if (Tpre_rounded - *Tno_bw > 0) { + if (st_vars->Tpre_rounded - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); - Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); + st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; } else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) - && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { + if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) + && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); } - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / + (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); else PrefetchBandwidth2 = 0; - if (Tpre_rounded - Tvm_trips_rounded > 0) { + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); - Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); + st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; } else PrefetchBandwidth3 = 0; if (VStartup == MaxVStartup && - (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * - LineTime - Tvm_trips_rounded > 0) { + (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * + st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); } - if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { - PrefetchBandwidth4 = prefetch_sw_bytes / - (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { + PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / + (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); } else { PrefetchBandwidth4 = 0; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); - dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); - dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); @@ -3758,9 +3732,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth1 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth1 >= Tr0_trips_rounded) { + / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { Case1OK = true; } else { Case1OK = false; @@ -3771,9 +3745,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth2 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth2 < Tr0_trips_rounded) { + / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { Case2OK = true; } else { Case2OK = false; @@ -3784,9 +3758,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth3 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < - Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * + st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= - Tr0_trips_rounded) { + st_vars->Tr0_trips_rounded) { Case3OK = true; } else { Case3OK = false; @@ -3796,80 +3770,80 @@ bool dml32_CalculatePrefetchSchedule( } if (Case1OK) - prefetch_bw_equ = PrefetchBandwidth1; + st_vars->prefetch_bw_equ = PrefetchBandwidth1; else if (Case2OK) - prefetch_bw_equ = PrefetchBandwidth2; + st_vars->prefetch_bw_equ = PrefetchBandwidth2; else if (Case3OK) - prefetch_bw_equ = PrefetchBandwidth3; + st_vars->prefetch_bw_equ = PrefetchBandwidth3; else - prefetch_bw_equ = PrefetchBandwidth4; + st_vars->prefetch_bw_equ = PrefetchBandwidth4; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); #endif - if (prefetch_bw_equ > 0) { + if (st_vars->prefetch_bw_equ > 0) { if (GPUVMEnable == true) { - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * - HostVMInefficiencyFactor / prefetch_bw_equ, - Tvm_trips, LineTime / 4); + st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, + st_vars->Tvm_trips, st_vars->LineTime / 4); } else { - Tvm_equ = LineTime / 4; + st_vars->Tvm_equ = st_vars->LineTime / 4; } if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * - HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, - (LineTime - Tvm_equ) / 2, LineTime / 4); + st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, + (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); } else { - Tr0_equ = (LineTime - Tvm_equ) / 2; + st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; } } else { - Tvm_equ = 0; - Tr0_equ = 0; + st_vars->Tvm_equ = 0; + st_vars->Tr0_equ = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); #endif } } - if (dst_y_prefetch_oto < dst_y_prefetch_equ) { - *DestinationLinesForPrefetch = dst_y_prefetch_oto; - TimeForFetchingMetaPTE = Tvm_oto; - TimeForFetchingRowInVBlank = Tr0_oto; - *PrefetchBandwidth = prefetch_bw_oto; + if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; + *PrefetchBandwidth = st_vars->prefetch_bw_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; + *PrefetchBandwidth = st_vars->prefetch_bw_equ; } - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; *DestinationLinesToRequestRowInVBlank = - dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; - LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - + st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); #endif - if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { - *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; + if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { + *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); @@ -3877,12 +3851,12 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); #endif if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / - LinesToRequestPrefetchPixelData, + st_vars->LinesToRequestPrefetchPixelData, (double) MaxNumSwathY * SwathHeightY / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); } else { @@ -3896,7 +3870,7 @@ bool dml32_CalculatePrefetchSchedule( #endif } - *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ @@ -3905,11 +3879,11 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); #endif if ((SwathHeightC > 4)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { *VRatioPrefetchC = dml_max(*VRatioPrefetchC, (double) MaxNumSwathC * SwathHeightC / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); } else { @@ -3924,25 +3898,25 @@ bool dml32_CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub - / LineTime; + / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub + / st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); #endif *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / - LinesToRequestPrefetchPixelData + st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC - * swath_width_chroma_ub / LineTime; + * swath_width_chroma_ub / st_vars->LineTime; } else { MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", - __func__, LinesToRequestPrefetchPixelData); + __func__, st_vars->LinesToRequestPrefetchPixelData); #endif *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; @@ -3951,15 +3925,15 @@ bool dml32_CalculatePrefetchSchedule( } #ifdef __DML_VBA_DEBUG__ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", - (double)LinesToRequestPrefetchPixelData * LineTime + - 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); + (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + + 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); dml_print("DML: To: %fus - time for propagation from scaler to optc\n", - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - - TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - + st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); #endif @@ -3967,7 +3941,7 @@ bool dml32_CalculatePrefetchSchedule( MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", - __func__, dst_y_prefetch_equ); + __func__, st_vars->dst_y_prefetch_equ); #endif } @@ -3983,10 +3957,10 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); #endif prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / - (*DestinationLinesToRequestVMInVBlank * LineTime); + (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -4003,7 +3977,7 @@ bool dml32_CalculatePrefetchSchedule( prefetch_row_bw = 0; } else if (*DestinationLinesToRequestRowInVBlank > 0) { prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / - (*DestinationLinesToRequestRowInVBlank * LineTime); + (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); @@ -4026,12 +4000,12 @@ bool dml32_CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index ecd4f1e7d1f7..37a314ce284b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -715,6 +715,7 @@ double dml32_CalculateExtraLatency( unsigned int HostVMMaxNonCachedPageTableLevels); bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 42e4e4c5e656..8460aefe7b6d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -247,6 +247,43 @@ struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; }; +struct dml32_CalculatePrefetchSchedule { + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE; + double TimeForFetchingRowInVBlank; + double LinesToRequestPrefetchPixelData; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips; + double Tr0_trips; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double bytes_pp; + double dep_bytes; + unsigned int max_vratio_pre; + double min_Lsw; + double Tsw_est1; + double Tsw_est3; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -321,6 +358,7 @@ struct dummy_vars { struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; + struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule; }; struct vba_vars_st { -- cgit v1.2.3 From 9dd4545f65679042273a93054ec3bb665ecf7366 Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Thu, 21 Jul 2022 14:09:46 +0800 Subject: drm/amd: Fix typo 'the the' in comment Replace 'the the' with 'the' in the comment. Signed-off-by: Slark Xiao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- drivers/gpu/drm/amd/include/atombios.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ff659d4f772b..8adeb7469f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,7 +47,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. + * that the relevant GPU caches have been flushed. */ struct amdgpu_fence { diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 1f9df4e7509b..15943bc21bc5 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -3255,8 +3255,8 @@ ucMaxNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of t ucMinNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all. -usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. -usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. */ -- cgit v1.2.3 From 3616d49da5dab635c4bb0eeb183b61df72cd0757 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 19 Apr 2022 14:17:05 +0800 Subject: drm/amdgpu: enable swiotlb for gmc 10.0 (V2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable swiotlb for gmc 10.0. v2: include drm_cache.h to use the function ‘drm_need_swiotlb’ Signed-off-by: Aaron Liu Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 1772f006c61a..9ae8cdaa033e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,6 +22,9 @@ */ #include #include + +#include + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" @@ -980,6 +983,8 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; -- cgit v1.2.3 From 4c5aa594928f97593502a66d5a9075f5f5dd064b Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Mon, 25 Jul 2022 08:45:56 +0800 Subject: drm/amdgpu: enable swiotlb for gmc 11.0 Enable swiotlb for gmc 11.0. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 503e40a90319..1471bfb9ae38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -22,6 +22,9 @@ */ #include #include + +#include + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" @@ -775,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v11_0_mc_init(adev); if (r) return r; -- cgit v1.2.3 From 8de297dc046c180651c0500f8611663ae1c3828a Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 11 Jul 2022 10:03:03 -0400 Subject: drm/amd/display: Avoid MPC infinite loop [why] In some cases MPC tree bottom pipe ends up point to itself. This causes iterating from top to bottom to hang the system in an infinite loop. [how] When looping to next MPC bottom pipe, check that the pointer is not same as current to avoid infinite loop. Reviewed-by: Josip Pavic Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 11019c2c62cc..769974375b4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -49,6 +49,11 @@ void mpc1_set_bg_color(struct mpc *mpc, /* find bottommost mpcc. */ while (bottommost_mpcc->mpcc_bot) { + /* avoid circular linked link */ + ASSERT(bottommost_mpcc != bottommost_mpcc->mpcc_bot); + if (bottommost_mpcc == bottommost_mpcc->mpcc_bot) + break; + bottommost_mpcc = bottommost_mpcc->mpcc_bot; } -- cgit v1.2.3 From 44584b417a698bfaac0c2577e7cc4015ea7359ce Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 6 Jul 2022 16:26:40 -0400 Subject: drm/amd/display: Add enable/disable FIFO callbacks to stream setup [Why] We don't write out attributes after disabling and re-enabling the link on some monitors, causing some, but not all, HDMI displays to fail to lightup on DCN314. [How] Firmware used to do this after DIG link setup. Since firmware is no longer doing this to support USB4 and dynamic link remapping we'll need to add this to driver in the equivalent paths. New optional callbacks were created in the stream encoder interface and implementations were added for DCN314. Reviewed-by: Michael Strauss Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dcn314/dcn314_dio_stream_encoder.c | 38 +++++++++++++++------- .../gpu/drm/amd/display/dc/inc/hw/stream_encoder.h | 2 ++ .../gpu/drm/amd/display/dc/link/link_hwss_dio.c | 7 ++++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index 2dbfa1c234dd..b384f30395d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -50,6 +50,26 @@ enc1->base.ctx +static void enc314_enable_fifo(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON */ + REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); + REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); +} + +static void enc314_disable_fifo(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0, + DIG_FIFO_READ_START_LEVEL, 0); +} static void enc314_dp_set_odm_combine( struct stream_encoder *enc, @@ -92,7 +112,7 @@ void enc314_stream_encoder_dvi_set_stream_attribute( //DIG_SOURCE_SELECT is already set in dig_connect_to_otg - /* DIG_START is removed from the register spec */ + enc314_enable_fifo(enc); } ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB); @@ -132,7 +152,7 @@ static void enc314_stream_encoder_hdmi_set_stream_attribute( //DIG_SOURCE_SELECT is already set in dig_connect_to_otg - /* DIG_START is removed from the register spec */ + enc314_enable_fifo(enc); } /* Configure pixel encoding */ @@ -302,16 +322,8 @@ static void enc314_stream_encoder_dp_unblank( REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); - /* - * DIG Resync FIFO now needs to be explicitly enabled. - * TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON - */ - REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1); - REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); - REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); + /* DIG Resync FIFO now needs to be explicitly enabled. */ + enc314_enable_fifo(enc); /* wait 100us for DIG/DP logic to prime * (i.e. a few video lines) @@ -420,6 +432,8 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = { .set_dynamic_metadata = enc2_set_dynamic_metadata, .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute, + .enable_fifo = enc314_enable_fifo, + .disable_fifo = enc314_disable_fifo, .set_input_mode = enc314_set_dig_input_mode, }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 456dbe9f2264..42afa1952890 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -252,6 +252,8 @@ struct stream_encoder_funcs { void (*set_input_mode)( struct stream_encoder *enc, unsigned int pix_per_container); + void (*enable_fifo)(struct stream_encoder *enc); + void (*disable_fifo)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c index 776e822abcbb..5e92019539c8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c @@ -40,17 +40,24 @@ void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx) { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; link_enc->funcs->connect_dig_be_to_fe(link_enc, pipe_ctx->stream_res.stream_enc->id, true); if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE); + if (stream_enc->funcs->enable_fifo) + stream_enc->funcs->enable_fifo(stream_enc); } void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + + if (stream_enc && stream_enc->funcs->disable_fifo) + stream_enc->funcs->disable_fifo(stream_enc); link_enc->funcs->connect_dig_be_to_fe( link_enc, -- cgit v1.2.3 From 2ce0b2186c057a54a4d980b296bd1659d0091716 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 7 Jul 2022 10:19:53 -0400 Subject: drm/amd/display: Fix OPTC function pointers for DCN314 [Why] Access violation occurs when swapping between HDMI and FRL monitors because we're missing the immediate_disable_crtc callback and it's required for the DCN314 clk manager. [How] Update the table to match the DCN31 optc functions for ones that should be the same: - immediate_disable_crtc - configure_crc Reviewed-by: Michael Strauss Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h | 2 ++ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index c4304f25ce95..2f7404a97479 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -141,7 +141,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } -static bool optc31_immediate_disable_crtc(struct timing_generator *optc) +bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index 3706e6f7880e..30b81a448ce2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -258,6 +258,8 @@ void dcn31_timing_generator_init(struct optc *optc1); +bool optc31_immediate_disable_crtc(struct timing_generator *optc); + void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); void optc3_init_odm(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 436c3545a983..3011f9e2f35c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -170,6 +170,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .program_global_sync = optc1_program_global_sync, .enable_crtc = optc314_enable_crtc, .disable_crtc = optc314_disable_crtc, + .immediate_disable_crtc = optc31_immediate_disable_crtc, .phantom_crtc_post_enable = optc314_phantom_crtc_post_enable, /* used by enable_timing_synchronization. Not need for FPGA */ .is_counter_moving = optc1_is_counter_moving, @@ -204,7 +205,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .setup_global_swap_lock = NULL, .get_crc = optc1_get_crc, - .configure_crc = optc1_configure_crc, + .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, -- cgit v1.2.3 From 319568d75f5f91cd4f362b26e65af2a4437c64bf Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 12 Jul 2022 15:49:19 -0400 Subject: drm/amd/display: Updates SubVP and SubVP DRR cases [Description] - For any DRR cases in SubVP, don't lock for VSYNC flips - For DCN32/321 use FW to do DRR manual trigger programming - Add bit in SubVP cmd to indicate if the SubVP pipe is DRR Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 17 ++++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 1 + drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 7 +-- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 63 +++++++++++++++++++++- 4 files changed, 82 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 39b426d04037..2d61c2a91cee 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -270,6 +270,23 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); } +void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) +{ + union dmub_rb_cmd cmd = { 0 }; + + cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + // TODO: Uncomment once FW headers are promoted + //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; + cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst; + + cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); + + // Send the command to the DMCUB. + dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd); + dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv); + dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); +} + static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream) { uint8_t pipes = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 1d124a2695d5..159782cd6659 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -74,6 +74,7 @@ void dc_dmub_trace_event_control(struct dc *dc, bool enable); void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal_min, uint32_t vtotal_max); +void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst); bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool enable_pstate, struct dc_state *context); void dc_dmub_srv_query_caps_cmd(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index b6bada383958..bf9ac9dfc7dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -424,7 +424,6 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, unsigned int i = 0; bool subvp_immediate_flip = false; bool subvp_in_use = false; - bool drr_pipe = false; struct pipe_ctx *pipe; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -440,12 +439,10 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN && top_pipe_to_program->plane_state->flip_immediate) subvp_immediate_flip = true; - else if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_NONE && - top_pipe_to_program->stream->ignore_msa_timing_param) - drr_pipe = true; } - if ((subvp_in_use && (should_lock_all_pipes || subvp_immediate_flip || drr_pipe)) || (!subvp_in_use && subvp_prev_use)) { + // Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared. + if ((subvp_in_use && (should_lock_all_pipes || subvp_immediate_flip)) || (!subvp_in_use && subvp_prev_use)) { union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; if (!lock) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index 00ff21458a53..992e56c6907e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -26,9 +26,11 @@ #include "dcn32_optc.h" #include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" #include "reg_helper.h" #include "dc.h" #include "dcn_calc_math.h" +#include "dc_dmub_srv.h" #define REG(reg)\ optc1->tg_regs->reg @@ -188,6 +190,65 @@ static void optc32_set_odm_bypass(struct timing_generator *optc, optc1->opp_count = 1; } +void optc32_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) + dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst); + else { + /* + * MIN_MASK_EN is gone and MASK is now always enabled. + * + * To get it to it work with manual trigger we need to make sure + * we program the correct bit. + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); + } +} + +void optc32_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + optc32_setup_manual_trigger(optc); + } else { + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_SET_V_TOTAL_MIN_MASK, 0, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_FORCE_LOCK_ON_EVENT, 0); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + } +} static struct timing_generator_funcs dcn32_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -221,7 +282,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, .set_vrr_m_const = optc3_set_vrr_m_const, - .set_drr = optc1_set_drr, + .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, .set_static_screen_control = optc1_set_static_screen_control, -- cgit v1.2.3 From a7cefb0b40dcfdafedc54a3ba659327d0336956d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 12 Jul 2022 14:32:45 -0400 Subject: drm/amd/display: Guard against zero memory channels [Why] If BIOS doesn't specify number of memory channels then bandwidth validation will fail due to insufficient BW in DML. [How] If BIOS is setting zero channels then use the default in the table. If no entry is in the table and no BIOS value is specified then throw an ASSERT for future developers to look into. Reviewed-by: Michael Strauss Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 450ebd838505..56ada096c89d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1916,8 +1916,11 @@ static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_14_soc.num_chans = bw_params->num_channels; + if (bw_params->num_channels > 0) + dcn3_14_soc.num_chans = bw_params->num_channels; + + ASSERT(dcn3_14_soc.num_chans); ASSERT(clk_table->num_entries); /* Prepass to find max clocks independent of voltage level. */ -- cgit v1.2.3 From eb54e014367e1700fa5b94e4c788520081a70661 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 12 Jul 2022 05:54:38 -0400 Subject: drm/amd/display: Fix two MPO videos in single display ODM combine mode [Why] In single display ODM combine mode, two MPO videos ( three planes ) are not working [How] When we detect three planes, don't set odm combine 2to1 policy for the MPO planes. Otherwise, we run out of pipes available Add support for two MPO videos in dc_add_plane_to_context(). Don't allow both videos to be on the same side of the display. Add extra check when fetching free pipe for two MPO videos. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 136 ++++++++++++++++--- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 149 +++++++++++++++++++-- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 6 + 3 files changed, 260 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 752ba4ab2b1e..ffc0f1c0ea93 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1463,6 +1463,7 @@ bool dc_add_plane_to_context( struct dc_stream_status *stream_status = NULL; struct pipe_ctx *prev_right_head = NULL; struct pipe_ctx *free_right_pipe = NULL; + struct pipe_ctx *prev_left_head = NULL; DC_LOGGER_INIT(stream->ctx->logger); for (i = 0; i < context->stream_count; i++) @@ -1514,8 +1515,16 @@ bool dc_add_plane_to_context( /* ODM + window MPO, where MPO window is on right half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) && - tail_pipe->next_odm_pipe) { + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) && + tail_pipe->next_odm_pipe) { + + /* For ODM + window MPO, in 3 plane case, if we already have a MPO window on + * the right side, then we will invalidate a 2nd one on the right side + */ + if (head_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + dc_plane_state_release(plane_state); + return false; + } DC_LOG_SCALER("%s - ODM + window MPO(right). free_pipe:%d tail_pipe->next_odm_pipe:%d\n", __func__, @@ -1530,20 +1539,42 @@ bool dc_add_plane_to_context( * - If not, continue to use free_pipe * - If the right side already has a pipe, use that pipe instead if its available */ + + /* + * We also want to avoid the case where with three plane ( 2 MPO videos ), we have + * both videos on the left side so one of the videos is invalidated. Then we + * move the invalidated video back to the right side. If the order of the plane + * states is such that the right MPO plane is processed first, the free pipe + * selected by the head will be the left MPO pipe. But since there was no right + * MPO pipe, it will assign the free pipe to the right MPO pipe instead and + * a pipe reallocation will occur. + * Check the old context to see if the left side already has a pipe allocated + * - If not, continue to use free_pipe + * - If the left side is already using this pipe, then pick another pipe for right + */ + prev_right_head = &dc->current_state->res_ctx.pipe_ctx[tail_pipe->next_odm_pipe->pipe_idx]; - if ((prev_right_head->bottom_pipe) && (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { + if ((prev_right_head->bottom_pipe) && + (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { free_right_pipe = acquire_free_pipe_for_head(context, pool, tail_pipe->next_odm_pipe); - if (free_right_pipe) { - free_pipe->stream = NULL; - memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); - memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); - free_pipe->plane_state = NULL; - free_pipe->pipe_idx = 0; - free_right_pipe->plane_state = plane_state; - free_pipe = free_right_pipe; + } else { + prev_left_head = &dc->current_state->res_ctx.pipe_ctx[head_pipe->pipe_idx]; + if ((prev_left_head->bottom_pipe) && + (free_pipe->pipe_idx == prev_left_head->bottom_pipe->pipe_idx)) { + free_right_pipe = acquire_free_pipe_for_head(context, pool, head_pipe); } } + if (free_right_pipe) { + free_pipe->stream = NULL; + memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); + memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); + free_pipe->plane_state = NULL; + free_pipe->pipe_idx = 0; + free_right_pipe->plane_state = plane_state; + free_pipe = free_right_pipe; + } + free_pipe->stream_res.tg = tail_pipe->next_odm_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->next_odm_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->next_odm_pipe->stream_res.opp; @@ -1553,7 +1584,63 @@ bool dc_add_plane_to_context( free_pipe->top_pipe = tail_pipe->next_odm_pipe; tail_pipe->next_odm_pipe->bottom_pipe = free_pipe; + } else if (free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) + && head_pipe->next_odm_pipe) { + + /* For ODM + window MPO, support 3 plane ( 2 MPO ) case. + * Here we have a desktop ODM + left window MPO and a new MPO window appears + * on the right side only. It fails the first case, because tail_pipe is the + * left window MPO, so it has no next_odm_pipe. So in this scenario, we check + * for head_pipe->next_odm_pipe instead + */ + DC_LOG_SCALER("%s - ODM + win MPO (left) + win MPO (right). free_pipe:%d head_pipe->next_odm:%d\n", + __func__, + free_pipe->pipe_idx, + head_pipe->next_odm_pipe ? head_pipe->next_odm_pipe->pipe_idx : -1); + + /* + * We want to avoid the case where the right side already has a pipe assigned to + * it and is different from free_pipe ( which would cause trigger a pipe + * reallocation ). + * Check the old context to see if the right side already has a pipe allocated + * - If not, continue to use free_pipe + * - If the right side already has a pipe, use that pipe instead if its available + */ + prev_right_head = &dc->current_state->res_ctx.pipe_ctx[head_pipe->next_odm_pipe->pipe_idx]; + if ((prev_right_head->bottom_pipe) && + (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { + free_right_pipe = acquire_free_pipe_for_head(context, pool, head_pipe->next_odm_pipe); + if (free_right_pipe) { + free_pipe->stream = NULL; + memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); + memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); + free_pipe->plane_state = NULL; + free_pipe->pipe_idx = 0; + free_right_pipe->plane_state = plane_state; + free_pipe = free_right_pipe; + } + } + + free_pipe->stream_res.tg = head_pipe->next_odm_pipe->stream_res.tg; + free_pipe->stream_res.abm = head_pipe->next_odm_pipe->stream_res.abm; + free_pipe->stream_res.opp = head_pipe->next_odm_pipe->stream_res.opp; + free_pipe->stream_res.stream_enc = head_pipe->next_odm_pipe->stream_res.stream_enc; + free_pipe->stream_res.audio = head_pipe->next_odm_pipe->stream_res.audio; + free_pipe->clock_source = head_pipe->next_odm_pipe->clock_source; + + free_pipe->top_pipe = head_pipe->next_odm_pipe; + head_pipe->next_odm_pipe->bottom_pipe = free_pipe; } else { + + /* For ODM + window MPO, in 3 plane case, if we already have a MPO window on + * the left side, then we will invalidate a 2nd one on the left side + */ + if (head_pipe->next_odm_pipe && tail_pipe->top_pipe) { + dc_plane_state_release(plane_state); + return false; + } + free_pipe->stream_res.tg = tail_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->stream_res.opp; @@ -1564,21 +1651,28 @@ bool dc_add_plane_to_context( free_pipe->top_pipe = tail_pipe; tail_pipe->bottom_pipe = free_pipe; - if (!free_pipe->next_odm_pipe && tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { - free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; - tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; - } - if (!free_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { - free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; - tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + /* Connect MPO pipes together if MPO window is in the centre */ + if (!(free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2))) { + if (!free_pipe->next_odm_pipe && + tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; + tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; + } + if (!free_pipe->prev_odm_pipe && + tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { + free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; + tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + } } } } /* ODM + window MPO, where MPO window is on left half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= - free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { DC_LOG_SCALER("%s - ODM + window MPO(left). free_pipe:%d\n", __func__, free_pipe->pipe_idx); @@ -1586,7 +1680,7 @@ bool dc_add_plane_to_context( } /* ODM + window MPO, where MPO window is on right half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { DC_LOG_SCALER("%s - ODM + window MPO(right). free_pipe:%d\n", __func__, free_pipe->pipe_idx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index e551d2936d03..314dec5712b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1820,11 +1820,12 @@ int dcn32_populate_dml_pipes_from_context( struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; + int plane_count = 0; + struct dc_crtc_timing *timing; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; if (!res_ctx->pipe_ctx[i].stream) continue; @@ -1876,11 +1877,12 @@ int dcn32_populate_dml_pipes_from_context( } } - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; - if (context->stream_count == 1) { - if (dc->debug.enable_single_display_2to1_odm_policy) - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; - } + /* Calculate the number of planes we have so we can determine + * whether to apply ODM 2to1 policy or not + */ + if (pipe->stream && !pipe->prev_odm_pipe && + (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) + ++plane_count; DC_FP_START(); is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); @@ -1889,6 +1891,28 @@ int dcn32_populate_dml_pipes_from_context( pipe_cnt++; } + /* Determine whether we will apply ODM 2to1 policy + * Applies to single display and where the number of planes is less than 3 + * For 3 plane case ( 2 MPO planes ), we will not set the policy for the MPO pipes + */ + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + res_ctx->pipe_ctx[i].stream->odm_2to1_policy_applied = false; + if (context->stream_count == 1 && timing->dsc_cfg.num_slices_h != 1) { + if (dc->debug.enable_single_display_2to1_odm_policy) { + if (!((plane_count > 2) && pipe->top_pipe)) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + } + res_ctx->pipe_ctx[i].stream->odm_2to1_policy_applied = true; + } + pipe_cnt++; + } + /* For DET allocation, we don't want to use DML policy (not optimal for utilizing all * the DET available for each pipe). Use the DET override input to maintain our driver * policy. @@ -1947,7 +1971,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .validate_bandwidth = dcn32_validate_bandwidth, .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, - .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, + .acquire_idle_pipe_for_head_pipe_in_layer = dcn32_acquire_idle_pipe_for_head_pipe_in_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, @@ -1976,7 +2000,7 @@ static bool dcn32_resource_construct( uint32_t pipe_fuses = 0; uint32_t num_pipes = 4; - DC_FP_START(); + DC_FP_START(); ctx->dc_bios->regs = &bios_regs; @@ -2316,13 +2340,13 @@ static bool dcn32_resource_construct( pool->base.oem_device = NULL; } - DC_FP_END(); + DC_FP_END(); return true; create_fail: - DC_FP_END(); + DC_FP_END(); dcn32_resource_destruct(pool); @@ -2346,3 +2370,108 @@ struct resource_pool *dcn32_create_resource_pool( kfree(pool); return NULL; } + +static struct pipe_ctx *find_idle_secondary_pipe_check_mpo( + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe) +{ + int i; + struct pipe_ctx *secondary_pipe = NULL; + struct pipe_ctx *next_odm_mpo_pipe = NULL; + int primary_index, preferred_pipe_idx; + struct pipe_ctx *old_primary_pipe = NULL; + + /* + * Modified from find_idle_secondary_pipe + * With windowed MPO and ODM, we want to avoid the case where we want a + * free pipe for the left side but the free pipe is being used on the + * right side. + * Add check on current_state if the primary_pipe is the left side, + * to check the right side ( primary_pipe->next_odm_pipe ) to see if + * it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe ) + * - If so, then don't use this pipe + * EXCEPTION - 3 plane ( 2 MPO plane ) case + * - in this case, the primary pipe has already gotten a free pipe for the + * MPO window in the left + * - when it tries to get a free pipe for the MPO window on the right, + * it will see that it is already assigned to the right side + * ( primary_pipe->next_odm_pipe ). But in this case, we want this + * free pipe, since it will be for the right side. So add an + * additional condition, that skipping the free pipe on the right only + * applies if the primary pipe has no bottom pipe currently assigned + */ + if (primary_pipe) { + primary_index = primary_pipe->pipe_idx; + old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index]; + if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe) + && (!primary_pipe->bottom_pipe)) + next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe; + + preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx; + if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + } + } + + /* + * search backwards for the second pipe to keep pipe + * assignment more consistent + */ + if (!secondary_pipe) + for (i = pool->pipe_count - 1; i >= 0; i--) { + if ((res_ctx->pipe_ctx[i].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) { + secondary_pipe = &res_ctx->pipe_ctx[i]; + secondary_pipe->pipe_idx = i; + break; + } + } + + return secondary_pipe; +} + +struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + struct pipe_ctx *head_pipe) +{ + struct resource_context *res_ctx = &state->res_ctx; + struct pipe_ctx *idle_pipe, *pipe; + struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; + int head_index; + + if (!head_pipe) + ASSERT(0); + + /* + * Modified from dcn20_acquire_idle_pipe_for_layer + * Check if head_pipe in old_context already has bottom_pipe allocated. + * - If so, check if that pipe is available in the current context. + * -- If so, reuse pipe from old_context + */ + head_index = head_pipe->pipe_idx; + pipe = &old_ctx->pipe_ctx[head_index]; + if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) { + idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx]; + idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx; + } else { + idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe); + if (!idle_pipe) + return NULL; + } + + idle_pipe->stream = head_pipe->stream; + idle_pipe->stream_res.tg = head_pipe->stream_res.tg; + idle_pipe->stream_res.opp = head_pipe->stream_res.opp; + + idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; + + return idle_pipe; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index fc0fe48023a0..efd449804d7b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -99,6 +99,12 @@ bool dcn32_subvp_in_use(struct dc *dc, bool dcn32_mpo_in_use(struct dc_state *context); +struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + struct pipe_ctx *head_pipe); + void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); -- cgit v1.2.3 From 4a21ab548a7ac19b15aba9aa8df39972ed82f4ad Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 13 Jul 2022 12:33:37 -0400 Subject: drm/amd/display: Calculate MALL cache lines based on Mblks required [Description] - Calculation for NumWays in MALL should be based on number of MBlks Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 1 + .../gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index efd449804d7b..1e7e6201c880 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -29,6 +29,7 @@ #include "core_types.h" #define DCN3_2_DET_SEG_SIZE 64 +#define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 47caa2c6d5b4..266c49884f04 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -51,6 +51,9 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t cache_lines_used = 0; uint32_t lines_per_way = 0; uint32_t total_cache_lines = 0; + uint32_t bytes_in_mall = 0; + uint32_t num_mblks = 0; + uint32_t cache_lines_per_plane = 0; uint32_t i = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -61,9 +64,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable; + + // For bytes required in MALL, calculate based on number of MBlks required + num_mblks = (mall_region_pixels * bytes_per_pixel + + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES; + bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) - cache_lines_used += (bytes_per_pixel * mall_region_pixels) / dc->caps.cache_line_size + 2; + cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2; + + // For DCC we must cache the meat surface, so double cache lines required + if (pipe->plane_state->dcc.enable) + cache_lines_per_plane *= 2; + cache_lines_used += cache_lines_per_plane; } } -- cgit v1.2.3 From f46e3f28f89b893685ec9204405677dac410d8ad Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Wed, 13 Jul 2022 17:07:43 -0400 Subject: drm/amd/display: Reset pipe count when iterating for DET override [Why] Coding error in DET allocation was resulting in too few DET segments being allocated, causing underflow. [How] Reset pipe count each time we begin iterating through pipes for a stream. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: Taimur Hassan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 266c49884f04..b3f8503cea9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -184,7 +184,8 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par if (context->stream_count > 0) { stream_segments = 18 / context->stream_count; - for (i = 0, count = 0; i < context->stream_count; i++) { + for (i = 0; i < context->stream_count; i++) { + count = 0; for (j = 0; j < pipe_cnt; j++) { if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) { count++; -- cgit v1.2.3 From 72bb503097e79113503dc6d1d49c4063ef97970a Mon Sep 17 00:00:00 2001 From: hersen wu Date: Thu, 14 Jul 2022 10:00:28 -0400 Subject: drm/amd/display: Reboot while unplug hdcp enabled dp from mst hub [Why] event_property_update does not check NULL pointer [How] check aconnector->base.state equals NULL Reviewed-by: Bhawanpreet Lakha Acked-by: Alex Hung Signed-off-by: hersen wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 34 ++++++++++++++-------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index fbb252afb494..6202e31c7e3a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -302,7 +302,7 @@ static void event_property_update(struct work_struct *work) mutex_lock(&hdcp_work->mutex); - if (aconnector->base.state->commit) { + if (aconnector->base.state && aconnector->base.state->commit) { ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ); if (ret == 0) { @@ -311,18 +311,26 @@ static void event_property_update(struct work_struct *work) } } - if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { - if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 && - hdcp_work->encryption_status <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); - else if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 && - hdcp_work->encryption_status == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); - } else { - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_DESIRED); + if (aconnector->base.state) { + if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { + if (aconnector->base.state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE0 && + hdcp_work->encryption_status <= + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + else if (aconnector->base.state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE1 && + hdcp_work->encryption_status == + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + } else { + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_DESIRED); + } } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } @@ -495,7 +503,9 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1, (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - hdcp_update_display(hdcp_work, link_index, aconnector, conn_state->hdcp_content_type, false); + if (conn_state) + hdcp_update_display(hdcp_work, link_index, aconnector, + conn_state->hdcp_content_type, false); } -- cgit v1.2.3 From ee4a26b4ea029aeed749c6560ed9eba224337b04 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 12:29:12 -0400 Subject: drm/amd/display: Drop unnecessary FPU flags on dcn302 files We already isolated the DCN302 code in the DML folder, but we forgot to drop the FPU flags from the Makefile. This commit drops those flags. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn302/Makefile | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index e4b69ad0dde5..ebd01cb467b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -7,31 +7,6 @@ DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_X86 -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -msse2 -endif -endif - AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02) -- cgit v1.2.3 From a4f1b04216023ff0f4cd89328b59ee6890248130 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 14 Jul 2022 18:21:28 -0400 Subject: drm/amd/display: Fallback to SW cursor if SubVP + cursor too big [Description] - For SubVP cursor cannot be cached in MALL, therefore we will switch to SW cursor if the cursor size exceeds what can fit in the local DCN buffers (64x64x4) - Returning false / failure for set_cursor_attributes will fallback to SW cursor Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6752ca44e6e0..f62d50901d92 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,6 +328,11 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; + + if (attributes->height * attributes->width * 4 > 16384) + if (stream->mall_stream_config.type == SUBVP_MAIN) + return false; + stream->cursor_attributes = *attributes; dc_z10_restore(dc); -- cgit v1.2.3 From 6a7379f196230cfa35335ec627e0c0e08da6a8b8 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 15:11:31 -0400 Subject: drm/amd/display: Remove FPU operations from dcn201 resources We have some FPU operations on the resource part of the DCN201. This commit drops FPU flags and moves any required FPU code to the DML folder. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn201/Makefile | 24 ------ .../drm/amd/display/dc/dcn201/dcn201_resource.c | 10 ++- .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 87 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h | 4 + 4 files changed, 100 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index 96cbd4ccd344..5c9ce2cebb0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -6,30 +6,6 @@ DCN201 = dcn201_init.o dcn201_resource.o dcn201_hwseq.o \ dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -msse2 -endif -endif AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN201) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0bb7d3dd53fa..e549a79f3fe1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -1036,6 +1036,14 @@ static bool dcn201_get_dcc_compression_cap(const struct dc *dc, output); } +static void dcn201_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn201_populate_dml_writeback_from_context_fpu(dc, res_ctx, pipes); + DC_FP_END(); +} static void dcn201_destroy_resource_pool(struct resource_pool **pool) { @@ -1067,8 +1075,8 @@ static struct resource_funcs dcn201_res_pool_funcs = { .add_dsc_to_stream_resource = NULL, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .acquire_idle_pipe_for_layer = dcn201_acquire_idle_pipe_for_layer, + .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, .set_mcif_arb_params = dcn20_set_mcif_arb_params, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index eeeae52fe6fc..39428488a052 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2303,3 +2303,90 @@ void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params) bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; bw_params->wm_table.entries[WM_D].valid = true; } + +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i, j; + double max_calc_writeback_dispclk; + double writeback_dispclk; + struct writeback_st dout_wb; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; + + if (!stream) + continue; + max_calc_writeback_dispclk = 0; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = 0; + pipes[pipe_cnt].dout.num_active_wb = 0; + for (j = 0; j < stream->num_wb_info; j++) { + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; + + if (wb_info->wb_enabled && wb_info->writeback_source_plane && + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { + pipes[pipe_cnt].dout.wb_enable = 1; + pipes[pipe_cnt].dout.num_active_wb++; + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;; + dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; + dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + dout_wb.wb_pixel_format = dm_420_8; + else + dout_wb.wb_pixel_format = dm_420_10; + } else + dout_wb.wb_pixel_format = dm_444_32; + + /* Workaround for cases where multiple writebacks are connected to same plane + * In which case, need to compute worst case and set the associated writeback parameters + * This workaround is necessary due to DML computation assuming only 1 set of writeback + * parameters per pipe */ + writeback_dispclk = CalculateWriteBackDISPCLK( + dout_wb.wb_pixel_format, + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, + dout_wb.wb_hratio, + dout_wb.wb_vratio, + dout_wb.wb_htaps_luma, + dout_wb.wb_vtaps_luma, + dout_wb.wb_htaps_chroma, + dout_wb.wb_vtaps_chroma, + dout_wb.wb_dst_width, + pipes[pipe_cnt].pipe.dest.htotal, + 2); + + if (writeback_dispclk > max_calc_writeback_dispclk) { + max_calc_writeback_dispclk = writeback_dispclk; + pipes[pipe_cnt].dout.wb = dout_wb; + } + } + } + + pipe_cnt++; + } + +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index a6e1ad0f38e9..c51badf7b68a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -84,4 +84,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); + #endif /* __DCN20_FPU_H__ */ -- cgit v1.2.3 From 05674cc9ea97079ad7c819628e3e56b046c2f497 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 15:27:58 -0400 Subject: drm/amd/display: Move mclk calculation function to DML The function responsible for calculating the MCLK switching has FPU operations. This commit moves it to the dcn30_fpu file. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 38 ------------------- .../gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 43 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 6 +++ 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 7f01463942fb..e5e54097a07d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2031,44 +2031,6 @@ void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; } -/* - * Finds dummy_latency_index when MCLK switching using firmware based - * vblank stretch is enabled. This function will iterate through the - * table of dummy pstate latencies until the lowest value that allows - * dm_allow_self_refresh_and_mclk_switch to happen is found - */ -int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel) -{ - const int max_latency_table_entries = 4; - int dummy_latency_index = 0; - - while (dummy_latency_index < max_latency_table_entries) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); - - if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == - dm_allow_self_refresh_and_mclk_switch) - break; - - dummy_latency_index++; - } - - if (dummy_latency_index == max_latency_table_entries) { - ASSERT(dummy_latency_index != max_latency_table_entries); - /* If the execution gets here, it means dummy p_states are - * not possible. This should never happen and would mean - * something is severely wrong. - * Here we reset dummy_latency_index to 3, because it is - * better to have underflows than system crashes. - */ - dummy_latency_index = 3; - } - - return dummy_latency_index; -} - void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { DC_FP_START(); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index c00f759fdded..9e32b45b63dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -616,6 +616,49 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, } +/** + * Finds dummy_latency_index when MCLK switching using firmware based + * vblank stretch is enabled. This function will iterate through the + * table of dummy pstate latencies until the lowest value that allows + * dm_allow_self_refresh_and_mclk_switch to happen is found + */ +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + const int max_latency_table_entries = 4; + int dummy_latency_index = 0; + + dc_assert_fp_enabled(); + + while (dummy_latency_index < max_latency_table_entries) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + + if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == + dm_allow_self_refresh_and_mclk_switch) + break; + + dummy_latency_index++; + } + + if (dummy_latency_index == max_latency_table_entries) { + ASSERT(dummy_latency_index != max_latency_table_entries); + /* If the execution gets here, it means dummy p_states are + * not possible. This should never happen and would mean + * something is severely wrong. + * Here we reset dummy_latency_index to 3, because it is + * better to have underflows than system crashes. + */ + dummy_latency_index = 3; + } + + return dummy_latency_index; +} + void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) { /* defaults */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index c2024052a497..3e4221af1c1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -63,6 +63,12 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, unsigned int *dcfclk_mhz, unsigned int *dram_speed_mts); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); #endif /* __DCN30_FPU_H__*/ -- cgit v1.2.3 From bdf4473394deffc94c16c0502e73f2daabbdc1d0 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 16:13:10 -0400 Subject: drm/amd/display: Create patch bounding box function for isolate FPU In the DCN30 resource, we have a small patch to the bounding box struct; this patch uses FPU operations. This commit moves that specific part to its function under the DML folder. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 19 ++----------------- drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 20 ++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 2 ++ 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index e5e54097a07d..8bdf3573610f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1521,26 +1521,11 @@ static bool init_soc_bounding_box(struct dc *dc, loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; loaded_ip->max_num_dpp = pool->base.pipe_count; loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - DC_FP_START(); dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_START(); + patch_dcn30_soc_bounding_box(dc, &dcn3_0_soc); DC_FP_END(); - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index 9e32b45b63dc..6dd9a70314c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -721,3 +721,23 @@ void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; } + +void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip) +{ + dc_assert_fp_enabled(); + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index 3e4221af1c1e..cab864095ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -71,4 +71,6 @@ int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); +void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip); + #endif /* __DCN30_FPU_H__*/ -- cgit v1.2.3 From 40b31e5355ba8c4f7e58f1c458bbe46763dca541 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 16:32:54 -0400 Subject: drm/amd/display: Remove FPU flags from DCN30 Makefile At this stage, we must have all the FPU code for DCN30 isolated in the DML folder. Drop FPU flags from Makefile. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 30 --------------------------- 1 file changed, 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index c20331eb62e0..b7c2ae9ddfda 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -30,36 +30,6 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ dcn30_dpp_cm.o dcn30_dwb_cm.o dcn30_cm_common.o dcn30_mmhubbub.o \ dcn30_dio_link_encoder.o dcn30_resource.o - -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 -endif -endif - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) -- cgit v1.2.3 From cb849b4dc73d414149fea96330cdf96a82919fc9 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 31 May 2022 16:55:32 -0400 Subject: drm/amd/display: Add pixel rate div calcs and programming [WHY/HOW] Need to calculate and set some pixel rate divisors on correct otg_inst Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c | 17 ++++++++++ .../gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c | 36 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h | 2 ++ .../gpu/drm/amd/display/dc/dcn314/dcn314_init.c | 1 + 4 files changed, 56 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index ea78da9c6f8b..3852a6d59b97 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -219,6 +219,21 @@ void dccg314_set_dpstreamclk( } } +void dccg314_set_valid_pixel_rate( + struct dccg *dccg, + int ref_dtbclk_khz, + int otg_inst, + int pixclk_khz) +{ + struct dtbclk_dto_params dto_params = {0}; + + dto_params.ref_dtbclk_khz = ref_dtbclk_khz; + dto_params.otg_inst = otg_inst; + dto_params.pixclk_khz = pixclk_khz; + + dccg314_set_dtbclk_dto(dccg, &dto_params); +} + static const struct dccg_funcs dccg314_funcs = { .update_dpp_dto = dccg31_update_dpp_dto, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, @@ -237,6 +252,8 @@ static const struct dccg_funcs dccg314_funcs = { .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, .disable_dsc = dccg31_disable_dscclk, .enable_dsc = dccg31_enable_dscclk, + .set_pixel_rate_div = dccg314_set_pixel_rate_div, + .set_valid_pixel_rate = dccg314_set_valid_pixel_rate, }; struct dccg *dccg314_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 90ec76487264..755c715ad8dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -338,3 +338,39 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } + +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int odm_combine_factor = 0; + + odm_combine_factor = get_odm_config(pipe_ctx, NULL); + + if (is_dp_128b_132b_signal(pipe_ctx)) { + *k2_div = PIXEL_RATE_DIV_BY_1; + } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { + *k1_div = PIXEL_RATE_DIV_BY_1; + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + *k2_div = PIXEL_RATE_DIV_BY_2; + else + *k2_div = PIXEL_RATE_DIV_BY_4; + } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + *k1_div = PIXEL_RATE_DIV_BY_1; + *k2_div = PIXEL_RATE_DIV_BY_2; + } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { + *k1_div = PIXEL_RATE_DIV_BY_2; + *k2_div = PIXEL_RATE_DIV_BY_2; + } else { + if (odm_combine_factor == 1) + *k2_div = PIXEL_RATE_DIV_BY_4; + else if (odm_combine_factor == 2) + *k2_div = PIXEL_RATE_DIV_BY_2; + } + } + + if ((*k1_div == PIXEL_RATE_DIV_NA) && (*k2_div == PIXEL_RATE_DIV_NA)) + ASSERT(false); + + return odm_combine_factor; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h index dfdd0b792a52..be0f5e4d48e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h @@ -37,4 +37,6 @@ void dcn314_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool po void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable); +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index c87b1979b2cc..b9debeb081fd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -144,6 +144,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .set_blend_lut = dcn30_set_blend_lut, .set_shaper_3dlut = dcn20_set_shaper_3dlut, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, }; void dcn314_hw_sequencer_construct(struct dc *dc) -- cgit v1.2.3 From 1c5a2fa97b91d37375f4fc8aeb37c9456c93c828 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 10:45:34 -0400 Subject: drm/amd/display: Use correct DTO_SRC_SEL for 128b/132b encoding [WHY] DP DTO isn't used for 128b/132b encoding [HOW] Check current link rate to determine whether using 8b/10b or 128/132b encoding Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 1 + .../gpu/drm/amd/display/dc/dce/dce_clock_source.c | 20 +++++++++++++++----- .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 1 + .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/inc/clock_source.h | 7 +++++-- 6 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index dfc74aea2852..48dad093ae8b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -7064,6 +7064,7 @@ void dp_enable_link_phy( pipes[i].clock_source->funcs->program_pix_clk( pipes[i].clock_source, &pipes[i].stream_res.pix_clk_params, + dp_get_link_encoding_format(link_settings), &pipes[i].pll_settings); } } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index d55da1ab1ac2..213de8cabfad 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -838,6 +838,7 @@ static void dce112_program_pixel_clk_resync( static bool dce110_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -911,6 +912,7 @@ static bool dce110_program_pix_clk( static bool dce112_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -970,6 +972,7 @@ static bool dce112_program_pix_clk( static bool dcn31_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -993,9 +996,14 @@ static bool dcn31_program_pix_clk( #if defined(CONFIG_DRM_AMD_DC_DCN) /* Enable DTO */ if (clk_src->cs_mask->PIPE0_DTO_SRC_SEL) - REG_UPDATE_2(PIXEL_RATE_CNTL[inst], - DP_DTO0_ENABLE, 1, - PIPE0_DTO_SRC_SEL, 1); + if (encoding == DP_128b_132b_ENCODING) + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 1, + PIPE0_DTO_SRC_SEL, 2); + else + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 1, + PIPE0_DTO_SRC_SEL, 1); else REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); @@ -1198,12 +1206,13 @@ const struct pixel_rate_range_table_entry *look_up_in_video_optimized_rate_tlb( static bool dcn20_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0; - dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings); + dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings); if (clock_source->ctx->dc->hwss.enable_vblanks_synchronization && clock_source->ctx->dc->config.vblank_alignment_max_frame_time_diff > 0) { @@ -1243,6 +1252,7 @@ static const struct clock_source_funcs dcn20_clk_src_funcs = { static bool dcn3_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -1265,7 +1275,7 @@ static bool dcn3_program_pix_clk( REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); } else // For other signal types(HDMI_TYPE_A, DVI) Driver still to call VBIOS Command table - dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings); + dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e69c942c8345..38a67051d470 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1435,6 +1435,7 @@ static enum dc_status dce110_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 7a3812604e4b..bed783747f16 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -892,6 +892,7 @@ enum dc_status dcn10_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 3b26962637d0..3e44b7998429 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -700,6 +700,7 @@ enum dc_status dcn20_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h index e2b3a2c7a927..8f8ac8e29ed0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h @@ -160,8 +160,11 @@ struct calc_pll_clock_source { struct clock_source_funcs { bool (*cs_power_down)( struct clock_source *); - bool (*program_pix_clk)(struct clock_source *, - struct pixel_clk_params *, struct pll_settings *); + bool (*program_pix_clk)( + struct clock_source *, + struct pixel_clk_params *, + enum dp_link_encoding encoding, + struct pll_settings *); uint32_t (*get_pix_clk_dividers)( struct clock_source *, struct pixel_clk_params *, -- cgit v1.2.3 From 0d60f2b47d8312d6a88454a373990579624edbde Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 10:48:43 -0400 Subject: drm/amd/display: Use correct clock source constructor for DCN314 [WHY] Previously was pointing to DCN3 clock constructor rather than DCN31's Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 56ada096c89d..b5672fb90236 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1782,7 +1782,7 @@ static struct clock_source *dcn31_clock_source_create( if (!clk_src) return NULL; - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { clk_src->base.dp_clk_src = dp_clk_src; return &clk_src->base; -- cgit v1.2.3 From 5ade1b951dec63a81b23c2ff75c5b4c191977620 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 10:52:52 -0400 Subject: drm/amd/display: Add OTG/ODM functions [WHY] Required for correct OTG_H_TIMING_CNTL programming Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 3011f9e2f35c..0c7980266b85 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -160,6 +160,37 @@ void optc314_phantom_crtc_post_enable(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); } +static void optc314_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +static void optc314_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); +} + static struct timing_generator_funcs dcn314_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -222,6 +253,9 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, .init_odm = optc3_init_odm, + .set_odm_bypass = optc314_set_odm_bypass, + .set_odm_combine = optc314_set_odm_combine, + .set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode, }; void dcn314_timing_generator_init(struct optc *optc1) -- cgit v1.2.3 From b5e924bd7cce823e604f9b7834fac846a910331c Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 11:02:15 -0400 Subject: drm/amd/display: Add missing AUDIO_DTO_SEL reg field [WHY] Needed to program audio dto Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h index 99ba597bf9b7..9a4a9efc0203 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h @@ -147,7 +147,8 @@ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\ - DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh) + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\ + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh) struct dccg *dccg314_create( struct dc_context *ctx, -- cgit v1.2.3 From 1f5dcb7365e4e0a8a6fca44352218ccc6f41a8b9 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 10 Jun 2022 16:28:03 -0400 Subject: drm/amd/display: Fix dpstreamclk programming [WHY] Currently programming incorrect hpo inst as well as selecting incorrect source [HOW] Use hpo inst instead of otg inst to select dpstreamclk inst Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c | 8 +++++--- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h | 8 ++------ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c | 13 +++++++------ drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 3 ++- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 3 ++- drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c | 4 ++-- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 799a383a2684..7f34418e6308 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -158,9 +158,11 @@ static void dccg31_disable_dpstreamclk(struct dccg *dccg, int otg_inst) } } -void dccg31_set_dpstreamclk(struct dccg *dccg, - enum streamclk_source src, - int otg_inst) +void dccg31_set_dpstreamclk( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) { if (src == REFCLK) dccg31_disable_dpstreamclk(dccg, otg_inst); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h index 32b5593b1460..0902ce5eb8a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h @@ -161,11 +161,6 @@ struct dccg *dccg31_create( void dccg31_init(struct dccg *dccg); -void dccg31_set_dpstreamclk( - struct dccg *dccg, - enum streamclk_source src, - int otg_inst); - void dccg31_enable_symclk32_se( struct dccg *dccg, int hpo_se_inst, @@ -207,7 +202,8 @@ void dccg31_get_dccg_ref_freq( void dccg31_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst); + int otg_inst, + int dp_hpo_inst); void dccg31_set_dtbclk_dto( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 3852a6d59b97..232cc15979dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -184,7 +184,8 @@ void dccg314_set_dtbclk_dto( void dccg314_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst) + int otg_inst, + int dp_hpo_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -192,26 +193,26 @@ void dccg314_set_dpstreamclk( dccg314_set_dtbclk_p_src(dccg, src, otg_inst); /* enabled to select one of the DTBCLKs for pipe */ - switch (otg_inst) { + switch (dp_hpo_inst) { case 0: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK0_SRC_SEL, 0); + DPSTREAMCLK0_SRC_SEL, otg_inst); break; case 1: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK1_SRC_SEL, 1); + DPSTREAMCLK1_SRC_SEL, otg_inst); break; case 2: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK2_SRC_SEL, 2); + DPSTREAMCLK2_SRC_SEL, otg_inst); break; case 3: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK3_SRC_SEL, 3); + DPSTREAMCLK3_SRC_SEL, otg_inst); break; default: BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index 12fc3afd9acd..a31c64b50410 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -211,7 +211,8 @@ static void dccg32_get_dccg_ref_freq(struct dccg *dccg, void dccg32_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst) + int otg_inst, + int dp_hpo_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index c2d116cce119..ce006762f257 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -101,7 +101,8 @@ struct dccg_funcs { void (*set_dpstreamclk)( struct dccg *dccg, enum streamclk_source src, - int otg_inst); + int otg_inst, + int dp_hpo_inst); void (*enable_symclk32_se)( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index ea6cf8bfce30..db7b0b155374 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -116,7 +116,7 @@ static void setup_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst); + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, link_enc->inst); dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); stream_enc->funcs->enable_stream(stream_enc); @@ -137,7 +137,7 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) stream_enc->funcs->disable(stream_enc); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst); - dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, pipe_ctx->link_res.hpo_dp_link_enc->inst); } static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx) -- cgit v1.2.3 From 81f776b670a0eb17bf9cfbfd2df32f66a34bc453 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 16 Jul 2022 23:14:01 -0400 Subject: drm/amd/display: Add support for manual DMUB FAMS trigger - Add is_drr parameter to indicate DRR is enabled on the panel to determine whether SubVP MCLK switch logic should be enabled - Add DRR manual trigger in FW (instead of driver) because manual trigger programming triggers DRR update pending and can block SubVP MCLK switches from taking place Acked-by: Alex Hung Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index de193636d022..d7f3619352f0 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -976,7 +976,8 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 { uint16_t vtotal; uint8_t main_pipe_index; uint8_t phantom_pipe_index; - uint8_t padding[2]; + uint8_t is_drr; + uint8_t padding; } subvp_data; struct { @@ -1579,6 +1580,12 @@ enum dmub_cmd_fams_type { DMUB_CMD__FAMS_SETUP_FW_CTRL = 0, DMUB_CMD__FAMS_DRR_UPDATE = 1, DMUB_CMD__HANDLE_SUBVP_CMD = 2, // specifically for SubVP cmd + /** + * For SubVP set manual trigger in FW because it + * triggers DRR_UPDATE_PENDING which SubVP relies + * on (for any SubVP cases that use a DRR display) + */ + DMUB_CMD__FAMS_SET_MANUAL_TRIGGER = 3, }; /** -- cgit v1.2.3 From 4074f96d0f2db2fcbd8e223aebe60ca445b8aeae Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 28 Jun 2022 12:36:04 -0400 Subject: drm/amd/display: Cache cursor when cursor exceeds 64x64 [Why] When Static screen from MALL, the cursor needs to be cached if cursor exceeds 64x64 size. [How] Program the bit that cache cursor in MALL when size of the cursor exceeds 64x64. Reviewed-by: Jun Lei Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h | 2 +- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 9 +++++++-- drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 3176b04a7740..6ec1c52535b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -47,13 +47,13 @@ void hubp32_update_force_pstate_disallow(struct hubp *hubp, bool pstate_disallow DATA_UCLK_PSTATE_FORCE_VALUE, 0); } -void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel) +void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); // Also cache cursor in MALL if using MALL for SS REG_UPDATE_2(DCHUBP_MALL_CONFIG, USE_MALL_SEL, mall_sel, - USE_MALL_FOR_CURSOR, mall_sel == 2 ? 1 : 0); + USE_MALL_FOR_CURSOR, c_cursor); } void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h index c4315d50fbb0..56ef71151536 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h @@ -52,7 +52,7 @@ void hubp32_update_force_pstate_disallow(struct hubp *hubp, bool pstate_disallow); -void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel); +void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index bf9ac9dfc7dd..1f845e9ac406 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -670,18 +670,23 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) { int i; unsigned int num_ways = dcn32_calculate_cab_allocation(dc, context); + bool cache_cursor = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { + if (hubp->curs_attr.width * hubp->curs_attr.height * 4 > 16384) + cache_cursor = true; + if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - hubp->funcs->hubp_update_mall_sel(hubp, 1); + hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { hubp->funcs->hubp_update_mall_sel(hubp, num_ways <= dc->caps.cache_num_ways && - pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0); + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0, + cache_cursor); } } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 906818e792dd..44c4578193a3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -198,7 +198,7 @@ struct hubp_funcs { void (*hubp_soft_reset)(struct hubp *hubp, bool reset); void (*hubp_update_force_pstate_disallow)(struct hubp *hubp, bool allow); - void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel); + void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); void (*hubp_prepare_subvp_buffering)(struct hubp *hubp, bool enable); void (*hubp_set_flip_int)(struct hubp *hubp); -- cgit v1.2.3 From e216431b63aef8b7d9cf6e59aea39582d48b1808 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 1 Apr 2022 15:29:21 -0400 Subject: drm/amd/display: Add dc_ctx to link_enc_create() parameters [Why&How] Preparation to enable run time initialization of register offsets to add dc_context to the link_enc_create callback. This is needed to get the dc_ctx handle where register offset initialization routine is called. Reviewed-by: Rodrigo Siqueira Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h | 1 + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 4 +++- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 4 +++- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + drivers/gpu/drm/amd/display/dc/inc/core_types.h | 1 + 23 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ef54b96affa8..9e51338441d0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1703,7 +1703,7 @@ static bool dc_link_construct_legacy(struct dc_link *link, enc_init_data.transmitter = translate_encoder_to_transmitter(enc_init_data.encoder); link->link_enc = - link->dc->res_pool->funcs->link_enc_create(&enc_init_data); + link->dc->res_pool->funcs->link_enc_create(dc_ctx, &enc_init_data); if (!link->link_enc) { DC_ERROR("Failed to create link encoder!\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 9ad8ad4550d9..54805802cbd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -612,6 +612,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce100_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 41804059550f..f808315b2835 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -660,6 +660,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce110_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 62da6bc3094d..e179e80667d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -618,6 +618,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce112_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index c4353a03b48a..1b70b78e2fa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -697,6 +697,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce120_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c index dcfa0a3efa00..fc6aa098bda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c @@ -710,6 +710,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce60_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 0c3695e79652..b28025960050 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -713,6 +713,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce80_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index e75be799012e..174eebbe8b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -740,6 +740,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn10_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn10_link_encoder *enc10 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 1483de85a524..621767e994bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -925,6 +925,7 @@ static const struct encoder_feature_support link_enc_feature = { }; struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index 7cbe1e9daa36..da0241e8c255 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -50,6 +50,7 @@ struct resource_pool *dcn20_create_resource_pool( struct dc *dc); struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data); unsigned int dcn20_calc_max_scaled_time( diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index e549a79f3fe1..407d995bfa99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -788,6 +788,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn201_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d95875952fba..7cb35bb1c0f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1325,6 +1325,7 @@ static int map_transmitter_id_to_phy_instance( } static struct link_encoder *dcn21_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn21_link_encoder *enc21 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 8bdf3573610f..64320e0ca446 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -927,6 +927,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn30_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index a5df74110284..db172677d613 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -890,6 +890,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn301_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index f537888f4fa6..4fab537e822f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -891,7 +891,9 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(4) }; -static struct link_encoder *dcn302_link_encoder_create(const struct encoder_init_data *enc_init_data) +static struct link_encoder *dcn302_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 76f863eb86ef..0a67f8a5656d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -819,7 +819,9 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(1) }; -static struct link_encoder *dcn303_link_encoder_create(const struct encoder_init_data *enc_init_data) +static struct link_encoder *dcn303_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 6d25fcf865bf..468a893ff785 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1093,6 +1093,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index b5672fb90236..63861cdfb09f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1262,6 +1262,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index 1a5f5977f962..7463b12ae4a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1089,6 +1089,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 53dea466348f..d56a212e065c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -1088,6 +1088,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 314dec5712b5..bb1d880eee1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1117,6 +1117,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn32_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 9ac0fcf79bed..73d2a653a279 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1119,6 +1119,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn321_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index e4b4102b1538..b3d0a4ea2446 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -96,6 +96,7 @@ struct resource_funcs { struct panel_cntl*(*panel_cntl_create)( const struct panel_cntl_init_data *panel_cntl_init_data); struct link_encoder *(*link_enc_create)( + struct dc_context *ctx, const struct encoder_init_data *init); /* Create a minimal link encoder object with no dc_link object * associated with it. */ -- cgit v1.2.3 From 6290ba4c1656ce2374996b2ed11c3997ebbc4af5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 19 Apr 2022 11:22:17 -0400 Subject: drm/amd/display: Add reinstate dram in the FPO logic In order to handle FPO correctly, we need to reinstate the dram values. This function adds the required code to handle the vblank stretch and the dram calculation. Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.h | 3 + .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 67 ++++++++++++++++++---- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index 3330a1026fa5..7d063c7d6a4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -99,6 +99,9 @@ enum dc_status dcn30_add_stream_to_ctx( void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel); #endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9175fe1f9be3..01e272f0bb05 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -30,6 +30,7 @@ #include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +#include "dcn30/dcn30_resource.h" #define DC_LOGGER_INIT(logger) @@ -1597,6 +1598,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; + unsigned int dummy_latency_index = 0; + int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin; dc_assert_fp_enabled(); @@ -1606,6 +1611,32 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, pstate_en = true; } + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + + if (!pstate_en) { + /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = + dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc, + context, pipes, pipe_cnt, vlevel); + + /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch + * we reinstate the original dram_clock_change_latency_us on the context + * and all variables that may have changed up to this point, except the + * newly found dummy_latency_index + */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != + dm_dram_clock_change_unsupported; + } + } + /* Set B: * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark @@ -1687,19 +1718,33 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - unsigned int min_dram_speed_mts_margin = 160; + min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts_margin = 160; - if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == - dm_dram_clock_change_unsupported) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us; - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == + dm_dram_clock_change_unsupported) { + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + + min_dram_speed_mts = + dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; + } + + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + /* find largest table entry that is lower than dram speed, + * but lower than DPM0 still uses DPM0 + */ + for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts) + break; + } + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; @@ -1770,6 +1815,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) + dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); } static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, -- cgit v1.2.3 From 493af96d61b1361a44d016057e0fc6e5d4c92fc7 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 19 May 2022 14:03:09 -0400 Subject: drm/amd/display: Update DCN32 and DCN321 SR latencies Update worst case SR latencies according to values measured by hardware team. Reviewed-by: Rodrigo Siqueira Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 01e272f0bb05..66453546e24f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -121,8 +121,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_time_us = 20.16, + .sr_enter_plus_exit_time_us = 27.13, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 6e72336b7975..84b4b00f29cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -119,8 +119,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_time_us = 12.36, + .sr_enter_plus_exit_time_us = 16.72, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, -- cgit v1.2.3 From ffccfdbab1056954fd400d2864a1c1b38915c143 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Thu, 26 May 2022 21:12:23 -0400 Subject: drm/amd/display: Add missing ODM 2:1 policy logic Phantom pipes must use the same configuration used in main pipes. This commit add this check. Reviewed-by: Rodrigo Siqueira Acked-by: Alex Hung Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 1 - drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 621767e994bd..8224b9bf01d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1269,7 +1269,6 @@ static void get_pixel_clock_parameters( pixel_clk_params->requested_pix_clk_100hz /= 4; else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2) pixel_clk_params->requested_pix_clk_100hz /= 2; - else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) { if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) pixel_clk_params->requested_pix_clk_100hz /= 2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 1f845e9ac406..be2e3b9e971e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1084,8 +1084,13 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign struct dc_stream_state *stream = pipe_ctx->stream; unsigned int odm_combine_factor = 0; struct dc *dc = pipe_ctx->stream->ctx->dc; - bool two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); + bool two_pix_per_container = false; + // For phantom pipes, use the same programming as the main pipes + if (pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { + stream = pipe_ctx->stream->mall_stream_config.paired_stream; + } + two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (is_dp_128b_132b_signal(pipe_ctx)) { -- cgit v1.2.3 From 3facca7489be2d42c6256350dae72c342a47d7e5 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 2 Jun 2022 16:01:33 -0400 Subject: drm/amd/display: Disable MPC split for DCN32/321 Due to CRB, no need to rely on MPC splitting to maximize use of DET anymore. Reviewed-by: Rodrigo Siqueira Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index bb1d880eee1e..2278181ad3d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -843,7 +843,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 73d2a653a279..ea5160d0c92e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -844,7 +844,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, -- cgit v1.2.3 From b1babe8623f91a71a5acc34eb323c718e0413b74 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 3 Jun 2022 16:39:48 -0400 Subject: drm/amd/display: Add debug option for idle optimizations on cursor updates For optimizations and debug purposes we added an option to exit idle operations on cursor updates. Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 2278181ad3d6..9a26d24b579f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -868,6 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = true, + .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ea5160d0c92e..8157e40d2c7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -869,6 +869,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = true, + .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, }; -- cgit v1.2.3 From c13423c63f73ee2f9807fdb2a3a7e647421a8114 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 4 Nov 2021 19:41:55 -0400 Subject: drm/amd/display: Copy crc_skip_count when duplicating CRTC state [Why] crc_skip_count is used to track how many frames to skip to allow the OTG CRC engine to "warm up" before it outputs correct CRC values. Experimentally, this seems to be 2 frames. When duplicating CRTC states, this value was not copied to the duplicated state. Therefore, when this state is committed, we will needlessly wait 2 frames before outputing CRC values. Even if the CRC engine is already warmed up. [How] Copy the crc_skip_count as part of dm_crtc_duplicate_state. Acked-by: Alex Hung Signed-off-by: Leo Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 828189cb3441..594fe8a4d02b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -243,6 +243,7 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) state->freesync_config = cur->freesync_config; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->crc_skip_count = cur->crc_skip_count; state->mpo_requested = cur->mpo_requested; /* TODO Duplicate dc_stream after objects are stream object is flattened */ -- cgit v1.2.3 From d58715704c5c7d82d7194540780fb335ab337da0 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 17 Jul 2022 11:41:44 -0400 Subject: drm/amd/display: 3.2.196 This version brings along following fixes: - Copy crc_skip_count when duplicating CRTC state - Add debug option for idle optimizations on cursor updates - Disable MPC split for DCN32/321 - Add missing ODM 2:1 policy logic - Update DCN32 and DCN321 SR latencies - Add reinstate dram in the FPO logic - Add dc_ctx to link_enc_create() parameters - Cache cursor when cursor exceeds 64x64 - Add support for manual DMUB FAMS trigger - Fix dpstreamclk programming - Add missing AUDIO_DTO_SEL reg field - Add OTG/ODM functions - Use correct clock source constructor for DCN314 - Use correct DTO_SRC_SEL for 128b/132b encoding - Add pixel rate div calcs and programming - Remove FPU flags from DCN30 Makefile - Create patch bounding box function for isolate FPU - Move mclk calculation function to DML - Remove FPU operations from dcn201 resources - Fallback to SW cursor if SubVP + cursor too big - Drop unnecessary FPU flags on dcn302 files - Reboot while unplug hdcp enabled dp from mst hub - Reset pipe count when iterating for DET override - Calculate MALL cache lines based on Mblks required - Fix two MPO videos in single display ODM combine mode - Guard against zero memory channels - Updates SubVP + SubVP DRR cases updates - Fix OPTC function pointers for DCN314 - Add enable/disable FIFO callbacks to stream setup - Avoid MPC infinite loop Acked-by: Alex Hung Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index d05bbe193bfa..8e1e40083ec8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.195" +#define DC_VER "3.2.196" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 1b54a0121dba12af268fb75c413feabdb9f573d4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 22 Jul 2022 13:56:17 -0400 Subject: drm/amd/display: Reduce stack size in the mode support function When we use the allmodconfig option we see the following error: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'dml32_ModeSupportAndSystemConfigurationFull': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1: error: the frame size of 2464 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 3799 | } // ModeSupportAndSystemConfigurationFull This commit fixes this issue by moving part of the mode support operation from ModeSupportAndSystemConfigurationFull to a dedicated function. Cc: Harry Wentland Cc: Alex Deucher Cc: Aurabindo Pillai Cc: Stephen Rothwell Tested-by: Stephen Rothwell Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 132 +++++++++++---------- 1 file changed, 70 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index db3e43499a26..890612db08dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1654,6 +1654,75 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif } +static void mode_support_configuration(struct vba_vars_st *v, + struct display_mode_lib *mode_lib) +{ + int i, j; + + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.ScaleRatioAndTapsSupport == true + && mode_lib->vba.SourceFormatPixelAndScanSupport == true + && mode_lib->vba.ViewportSizeSupport[i][j] == true + && !mode_lib->vba.LinkRateDoesNotMatchDPVersion + && !mode_lib->vba.LinkRateForMultistreamNotIndicated + && !mode_lib->vba.BPPForMultistreamNotIndicated + && !mode_lib->vba.MultistreamWithHDMIOreDP + && !mode_lib->vba.ExceededMultistreamSlots[i] + && !mode_lib->vba.MSOOrODMSplitWithNonDPLink + && !mode_lib->vba.NotEnoughLanesForMSO + && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 + && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + && !mode_lib->vba.DSC422NativeNotSupported + && !mode_lib->vba.MPCCombineMethodIncompatible + && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true + && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true + && mode_lib->vba.NotEnoughDSCUnits[i] == false + && !mode_lib->vba.NotEnoughDSCSlices[i] + && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false + && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] + && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false + && !mode_lib->vba.InvalidCombinationOfMALLUseForPState + && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ROBSupport[i][j] == true + && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true + && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true + && mode_lib->vba.NumberOfOTGSupport == true + && mode_lib->vba.NumberOfHDMIFRLSupport == true + && mode_lib->vba.EnoughWritebackUnits == true + && mode_lib->vba.WritebackLatencySupport == true + && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true + && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true + && mode_lib->vba.ViewportExceedsSurface == false + && mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VActiveBandwithSupport[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true + && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.NonsupportedDSCInputBPC == false + && !mode_lib->vba.ExceededMALLSize + && ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) + && (!mode_lib->vba.DRAMClockChangeRequirementFinal + || i == v->soc.num_states - 1 + || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) + && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 + || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) + && (!mode_lib->vba.USRRetrainingRequiredFinal + || mode_lib->vba.USRRetrainingSupport[i][j])) { + mode_lib->vba.ModeSupport[i][j] = true; + } else { + mode_lib->vba.ModeSupport[i][j] = false; + } + } + } +} + void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; @@ -3632,68 +3701,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { - for (j = 0; j < 2; j++) { - if (mode_lib->vba.ScaleRatioAndTapsSupport == true - && mode_lib->vba.SourceFormatPixelAndScanSupport == true - && mode_lib->vba.ViewportSizeSupport[i][j] == true - && !mode_lib->vba.LinkRateDoesNotMatchDPVersion - && !mode_lib->vba.LinkRateForMultistreamNotIndicated - && !mode_lib->vba.BPPForMultistreamNotIndicated - && !mode_lib->vba.MultistreamWithHDMIOreDP - && !mode_lib->vba.ExceededMultistreamSlots[i] - && !mode_lib->vba.MSOOrODMSplitWithNonDPLink - && !mode_lib->vba.NotEnoughLanesForMSO - && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP - && !mode_lib->vba.DSC422NativeNotSupported - && !mode_lib->vba.MPCCombineMethodIncompatible - && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true - && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true - && mode_lib->vba.NotEnoughDSCUnits[i] == false - && !mode_lib->vba.NotEnoughDSCSlices[i] - && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false - && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] - && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false - && !mode_lib->vba.InvalidCombinationOfMALLUseForPState - && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified - && mode_lib->vba.ROBSupport[i][j] == true - && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true - && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true - && mode_lib->vba.NumberOfOTGSupport == true - && mode_lib->vba.NumberOfHDMIFRLSupport == true - && mode_lib->vba.EnoughWritebackUnits == true - && mode_lib->vba.WritebackLatencySupport == true - && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true - && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true - && mode_lib->vba.ViewportExceedsSurface == false - && mode_lib->vba.PrefetchSupported[i][j] == true - && mode_lib->vba.VActiveBandwithSupport[i][j] == true - && mode_lib->vba.DynamicMetadataSupported[i][j] == true - && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true - && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true - && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.NonsupportedDSCInputBPC == false - && !mode_lib->vba.ExceededMALLSize - && ((mode_lib->vba.HostVMEnable == false - && !mode_lib->vba.ImmediateFlipRequiredFinal) - || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) - && (!mode_lib->vba.DRAMClockChangeRequirementFinal - || i == v->soc.num_states - 1 - || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) - && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 - || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) - && (!mode_lib->vba.USRRetrainingRequiredFinal - || mode_lib->vba.USRRetrainingSupport[i][j])) { - mode_lib->vba.ModeSupport[i][j] = true; - } else { - mode_lib->vba.ModeSupport[i][j] = false; - } - } - } + mode_support_configuration(v, mode_lib); MaximumMPCCombine = 0; -- cgit v1.2.3