diff options
Diffstat (limited to 'drivers/gpu/drm/radeon')
53 files changed, 4594 insertions, 1701 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index cf8b4bc3e73d..2139fe893ec5 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -70,7 +70,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ - radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o + radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o \ + radeon_semaphore.o radeon_sa.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o @@ -78,4 +79,4 @@ radeon-$(CONFIG_ACPI) += radeon_acpi.o obj-$(CONFIG_DRM_RADEON)+= radeon.o -CFLAGS_radeon_trace_points.o := -I$(src)
\ No newline at end of file +CFLAGS_radeon_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index 14cc88aaf3a7..d1bd239cd9e9 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -665,6 +665,8 @@ static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg) SDEBUG(" count: %d\n", count); if (arg == ATOM_UNIT_MICROSEC) udelay(count); + else if (!drm_can_sleep()) + mdelay(count); else msleep(count); } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 2b97262e3ab1..0fda830ef806 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -554,7 +554,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, if (encoder->crtc == crtc) { radeon_encoder = to_radeon_encoder(encoder); connector = radeon_get_connector_for_encoder(encoder); - if (connector) + if (connector && connector->display_info.bpc) bpc = connector->display_info.bpc; encoder_mode = atombios_get_encoder_mode(encoder); if ((radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) || @@ -1184,7 +1184,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, WREG32(EVERGREEN_GRPH_X_END + radeon_crtc->crtc_offset, target_fb->width); WREG32(EVERGREEN_GRPH_Y_END + radeon_crtc->crtc_offset, target_fb->height); - fb_pitch_pixels = target_fb->pitch / (target_fb->bits_per_pixel / 8); + fb_pitch_pixels = target_fb->pitches[0] / (target_fb->bits_per_pixel / 8); WREG32(EVERGREEN_GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels); WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 1); @@ -1353,7 +1353,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, WREG32(AVIVO_D1GRPH_X_END + radeon_crtc->crtc_offset, target_fb->width); WREG32(AVIVO_D1GRPH_Y_END + radeon_crtc->crtc_offset, target_fb->height); - fb_pitch_pixels = target_fb->pitch / (target_fb->bits_per_pixel / 8); + fb_pitch_pixels = target_fb->pitches[0] / (target_fb->bits_per_pixel / 8); WREG32(AVIVO_D1GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels); WREG32(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 1); diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 39c04c1b8472..f1f06ca9f1f5 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -409,8 +409,6 @@ int atombios_get_encoder_mode(struct drm_encoder *encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - struct drm_device *dev = encoder->dev; - struct radeon_device *rdev = dev->dev_private; struct drm_connector *connector; struct radeon_connector *radeon_connector; struct radeon_connector_atom_dig *dig_connector; @@ -434,13 +432,10 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */ - if (drm_detect_monitor_audio(radeon_connector->edid) && radeon_audio) { - /* fix me */ - if (ASIC_IS_DCE4(rdev)) - return ATOM_ENCODER_MODE_DVI; - else - return ATOM_ENCODER_MODE_HDMI; - } else if (radeon_connector->use_digital) + if (drm_detect_monitor_audio(radeon_connector->edid) && + radeon_audio) + return ATOM_ENCODER_MODE_HDMI; + else if (radeon_connector->use_digital) return ATOM_ENCODER_MODE_DVI; else return ATOM_ENCODER_MODE_CRT; @@ -448,13 +443,10 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) case DRM_MODE_CONNECTOR_DVID: case DRM_MODE_CONNECTOR_HDMIA: default: - if (drm_detect_monitor_audio(radeon_connector->edid) && radeon_audio) { - /* fix me */ - if (ASIC_IS_DCE4(rdev)) - return ATOM_ENCODER_MODE_DVI; - else - return ATOM_ENCODER_MODE_HDMI; - } else + if (drm_detect_monitor_audio(radeon_connector->edid) && + radeon_audio) + return ATOM_ENCODER_MODE_HDMI; + else return ATOM_ENCODER_MODE_DVI; break; case DRM_MODE_CONNECTOR_LVDS: @@ -465,13 +457,10 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) return ATOM_ENCODER_MODE_DP; - else if (drm_detect_monitor_audio(radeon_connector->edid) && radeon_audio) { - /* fix me */ - if (ASIC_IS_DCE4(rdev)) - return ATOM_ENCODER_MODE_DVI; - else - return ATOM_ENCODER_MODE_HDMI; - } else + else if (drm_detect_monitor_audio(radeon_connector->edid) && + radeon_audio) + return ATOM_ENCODER_MODE_HDMI; + else return ATOM_ENCODER_MODE_DVI; break; case DRM_MODE_CONNECTOR_eDP: diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 92c9628c572d..636660fca8c2 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -40,6 +40,8 @@ static void evergreen_gpu_init(struct radeon_device *rdev); void evergreen_fini(struct radeon_device *rdev); void evergreen_pcie_gen2_enable(struct radeon_device *rdev); +extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev, + int ring, u32 cp_int_cntl); void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev) { @@ -1311,18 +1313,20 @@ void evergreen_mc_program(struct radeon_device *rdev) */ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + /* set to DX10/11 mode */ - radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0)); - radeon_ring_write(rdev, 1); + radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); + radeon_ring_write(ring, 1); /* FIXME: implement */ - radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | #endif (ib->gpu_addr & 0xFFFFFFFC)); - radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(rdev, ib->length_dw); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); + radeon_ring_write(ring, ib->length_dw); } @@ -1360,71 +1364,73 @@ static int evergreen_cp_load_microcode(struct radeon_device *rdev) static int evergreen_cp_start(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r, i; uint32_t cp_me; - r = radeon_ring_lock(rdev, 7); + r = radeon_ring_lock(rdev, ring, 7); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); return r; } - radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); - radeon_ring_write(rdev, 0x1); - radeon_ring_write(rdev, 0x0); - radeon_ring_write(rdev, rdev->config.evergreen.max_hw_contexts - 1); - radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(ring, 0x1); + radeon_ring_write(ring, 0x0); + radeon_ring_write(ring, rdev->config.evergreen.max_hw_contexts - 1); + radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_unlock_commit(rdev, ring); cp_me = 0xff; WREG32(CP_ME_CNTL, cp_me); - r = radeon_ring_lock(rdev, evergreen_default_size + 19); + r = radeon_ring_lock(rdev, ring, evergreen_default_size + 19); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); return r; } /* setup clear context state */ - radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); for (i = 0; i < evergreen_default_size; i++) - radeon_ring_write(rdev, evergreen_default_state[i]); + radeon_ring_write(ring, evergreen_default_state[i]); - radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE); + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); /* set clear context state */ - radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(ring, 0); /* SQ_VTX_BASE_VTX_LOC */ - radeon_ring_write(rdev, 0xc0026f00); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(ring, 0xc0026f00); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); /* Clear consts */ - radeon_ring_write(rdev, 0xc0036f00); - radeon_ring_write(rdev, 0x00000bc4); - radeon_ring_write(rdev, 0xffffffff); - radeon_ring_write(rdev, 0xffffffff); - radeon_ring_write(rdev, 0xffffffff); + radeon_ring_write(ring, 0xc0036f00); + radeon_ring_write(ring, 0x00000bc4); + radeon_ring_write(ring, 0xffffffff); + radeon_ring_write(ring, 0xffffffff); + radeon_ring_write(ring, 0xffffffff); - radeon_ring_write(rdev, 0xc0026900); - radeon_ring_write(rdev, 0x00000316); - radeon_ring_write(rdev, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - radeon_ring_write(rdev, 0x00000010); /* */ + radeon_ring_write(ring, 0xc0026900); + radeon_ring_write(ring, 0x00000316); + radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + radeon_ring_write(ring, 0x00000010); /* */ - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); return 0; } int evergreen_cp_resume(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 tmp; u32 rb_bufsz; int r; @@ -1442,13 +1448,13 @@ int evergreen_cp_resume(struct radeon_device *rdev) RREG32(GRBM_SOFT_RESET); /* Set ring buffer size */ - rb_bufsz = drm_order(rdev->cp.ring_size / 8); + rb_bufsz = drm_order(ring->ring_size / 8); tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif WREG32(CP_RB_CNTL, tmp); - WREG32(CP_SEM_WAIT_TIMER, 0x4); + WREG32(CP_SEM_WAIT_TIMER, 0x0); /* Set the write pointer delay */ WREG32(CP_RB_WPTR_DELAY, 0); @@ -1456,8 +1462,8 @@ int evergreen_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); - rdev->cp.wptr = 0; - WREG32(CP_RB_WPTR, rdev->cp.wptr); + ring->wptr = 0; + WREG32(CP_RB_WPTR, ring->wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB_RPTR_ADDR, @@ -1475,16 +1481,16 @@ int evergreen_cp_resume(struct radeon_device *rdev) mdelay(1); WREG32(CP_RB_CNTL, tmp); - WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8); + WREG32(CP_RB_BASE, ring->gpu_addr >> 8); WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); - rdev->cp.rptr = RREG32(CP_RB_RPTR); + ring->rptr = RREG32(CP_RB_RPTR); evergreen_cp_start(rdev); - rdev->cp.ready = true; - r = radeon_ring_test(rdev); + ring->ready = true; + r = radeon_ring_test(rdev, ring); if (r) { - rdev->cp.ready = false; + ring->ready = false; return r; } return 0; @@ -2353,7 +2359,7 @@ int evergreen_mc_init(struct radeon_device *rdev) return 0; } -bool evergreen_gpu_is_lockup(struct radeon_device *rdev) +bool evergreen_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { u32 srbm_status; u32 grbm_status; @@ -2366,19 +2372,19 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev) grbm_status_se0 = RREG32(GRBM_STATUS_SE0); grbm_status_se1 = RREG32(GRBM_STATUS_SE1); if (!(grbm_status & GUI_ACTIVE)) { - r100_gpu_lockup_update(lockup, &rdev->cp); + r100_gpu_lockup_update(lockup, ring); return false; } /* force CP activities */ - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (!r) { /* PACKET2 NOP */ - radeon_ring_write(rdev, 0x80000000); - radeon_ring_write(rdev, 0x80000000); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + radeon_ring_unlock_commit(rdev, ring); } - rdev->cp.rptr = RREG32(CP_RB_RPTR); - return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp); + ring->rptr = RREG32(CP_RB_RPTR); + return r100_gpu_cp_is_lockup(rdev, lockup, ring); } static int evergreen_gpu_soft_reset(struct radeon_device *rdev) @@ -2470,7 +2476,13 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) { u32 tmp; - WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + if (rdev->family >= CHIP_CAYMAN) { + cayman_cp_int_cntl_setup(rdev, 0, + CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + cayman_cp_int_cntl_setup(rdev, 1, 0); + cayman_cp_int_cntl_setup(rdev, 2, 0); + } else + WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); WREG32(GRBM_INT_CNTL, 0); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); @@ -2515,6 +2527,7 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) int evergreen_irq_set(struct radeon_device *rdev) { u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE; + u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0; u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; @@ -2539,11 +2552,28 @@ int evergreen_irq_set(struct radeon_device *rdev) hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; - if (rdev->irq.sw_int) { - DRM_DEBUG("evergreen_irq_set: sw int\n"); - cp_int_cntl |= RB_INT_ENABLE; - cp_int_cntl |= TIME_STAMP_INT_ENABLE; + if (rdev->family >= CHIP_CAYMAN) { + /* enable CP interrupts on all rings */ + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); + cp_int_cntl |= TIME_STAMP_INT_ENABLE; + } + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + DRM_DEBUG("evergreen_irq_set: sw int cp1\n"); + cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; + } + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + DRM_DEBUG("evergreen_irq_set: sw int cp2\n"); + cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; + } + } else { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); + cp_int_cntl |= RB_INT_ENABLE; + cp_int_cntl |= TIME_STAMP_INT_ENABLE; + } } + if (rdev->irq.crtc_vblank_int[0] || rdev->irq.pflip[0]) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); @@ -2603,7 +2633,12 @@ int evergreen_irq_set(struct radeon_device *rdev) grbm_int_cntl |= GUI_IDLE_INT_ENABLE; } - WREG32(CP_INT_CNTL, cp_int_cntl); + if (rdev->family >= CHIP_CAYMAN) { + cayman_cp_int_cntl_setup(rdev, 0, cp_int_cntl); + cayman_cp_int_cntl_setup(rdev, 1, cp_int_cntl1); + cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2); + } else + WREG32(CP_INT_CNTL, cp_int_cntl); WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); @@ -3018,11 +3053,24 @@ restart_ih: case 177: /* CP_INT in IB1 */ case 178: /* CP_INT in IB2 */ DRM_DEBUG("IH: CP int: 0x%08x\n", src_data); - radeon_fence_process(rdev); + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; case 181: /* CP EOP event */ DRM_DEBUG("IH: CP EOP\n"); - radeon_fence_process(rdev); + if (rdev->family >= CHIP_CAYMAN) { + switch (src_data) { + case 0: + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); + break; + case 1: + radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + break; + case 2: + radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + break; + } + } else + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); @@ -3052,6 +3100,7 @@ restart_ih: static int evergreen_startup(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; /* enable pcie gen2 link */ @@ -3106,6 +3155,12 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -3115,7 +3170,9 @@ static int evergreen_startup(struct radeon_device *rdev) } evergreen_irq_set(rdev); - r = radeon_ring_init(rdev, rdev->cp.ring_size); + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, + R600_CP_RB_RPTR, R600_CP_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; r = evergreen_cp_load_microcode(rdev); @@ -3125,6 +3182,22 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + DRM_ERROR("radeon: failed testing IB (%d).\n", r); + rdev->accel_working = false; + } + + r = r600_audio_init(rdev); + if (r) { + DRM_ERROR("radeon: audio init failed\n"); + return r; + } + return 0; } @@ -3144,31 +3217,30 @@ int evergreen_resume(struct radeon_device *rdev) /* post card */ atom_asic_init(rdev->mode_info.atom_context); + rdev->accel_working = true; r = evergreen_startup(rdev); if (r) { DRM_ERROR("evergreen startup failed on resume\n"); return r; } - r = r600_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d).\n", r); - return r; - } - return r; } int evergreen_suspend(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + + r600_audio_fini(rdev); /* FIXME: we should wait for ring to be empty */ + radeon_ib_pool_suspend(rdev); + r600_blit_suspend(rdev); r700_cp_stop(rdev); - rdev->cp.ready = false; + ring->ready = false; evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); - r600_blit_suspend(rdev); return 0; } @@ -3243,8 +3315,8 @@ int evergreen_init(struct radeon_device *rdev) if (r) return r; - rdev->cp.ring_obj = NULL; - r600_ring_init(rdev, 1024 * 1024); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3253,29 +3325,24 @@ int evergreen_init(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = evergreen_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); rdev->accel_working = false; } - if (rdev->accel_working) { - r = radeon_ib_pool_init(rdev); - if (r) { - DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r); - rdev->accel_working = false; - } - r = r600_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d).\n", r); - rdev->accel_working = false; - } - } /* Don't start up if the MC ucode is missing on BTC parts. * The default clocks and voltages before the MC ucode @@ -3293,15 +3360,17 @@ int evergreen_init(struct radeon_device *rdev) void evergreen_fini(struct radeon_device *rdev) { + r600_audio_fini(rdev); r600_blit_fini(rdev); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - radeon_ib_pool_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); + radeon_semaphore_driver_fini(rdev); radeon_fence_driver_fini(rdev); radeon_agp_fini(rdev); radeon_bo_fini(rdev); diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 914e5af84163..2379849515c7 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -49,6 +49,7 @@ static void set_render_target(struct radeon_device *rdev, int format, int w, int h, u64 gpu_addr) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 cb_color_info; int pitch, slice; @@ -62,23 +63,23 @@ set_render_target(struct radeon_device *rdev, int format, pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1; - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); - radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, pitch); - radeon_ring_write(rdev, slice); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, cb_color_info); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); + radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, pitch); + radeon_ring_write(ring, slice); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, cb_color_info); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, (w - 1) | ((h - 1) << 16)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); } /* emits 5dw */ @@ -87,6 +88,7 @@ cp_set_surface_sync(struct radeon_device *rdev, u32 sync_type, u32 size, u64 mc_addr) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 cp_coher_size; if (size == 0xffffffff) @@ -99,39 +101,40 @@ cp_set_surface_sync(struct radeon_device *rdev, * to the RB directly. For IBs, the CP programs this as part of the * surface_sync packet. */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, 0); /* CP_COHER_CNTL2 */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */ } - radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(rdev, sync_type); - radeon_ring_write(rdev, cp_coher_size); - radeon_ring_write(rdev, mc_addr >> 8); - radeon_ring_write(rdev, 10); /* poll interval */ + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, sync_type); + radeon_ring_write(ring, cp_coher_size); + radeon_ring_write(ring, mc_addr >> 8); + radeon_ring_write(ring, 10); /* poll interval */ } /* emits 11dw + 1 surface sync = 16dw */ static void set_shaders(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u64 gpu_addr; /* VS */ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); - radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); + radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, 2); + radeon_ring_write(ring, 0); /* PS */ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); - radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, 1); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 2); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); + radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, 1); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 2); gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); @@ -141,6 +144,7 @@ set_shaders(struct radeon_device *rdev) static void set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 sq_vtx_constant_word2, sq_vtx_constant_word3; /* high addr, stride */ @@ -155,16 +159,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) SQ_VTCX_SEL_Z(SQ_SEL_Z) | SQ_VTCX_SEL_W(SQ_SEL_W); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); - radeon_ring_write(rdev, 0x580); - radeon_ring_write(rdev, gpu_addr & 0xffffffff); - radeon_ring_write(rdev, 48 - 1); /* size */ - radeon_ring_write(rdev, sq_vtx_constant_word2); - radeon_ring_write(rdev, sq_vtx_constant_word3); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); + radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); + radeon_ring_write(ring, 0x580); + radeon_ring_write(ring, gpu_addr & 0xffffffff); + radeon_ring_write(ring, 48 - 1); /* size */ + radeon_ring_write(ring, sq_vtx_constant_word2); + radeon_ring_write(ring, sq_vtx_constant_word3); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); if ((rdev->family == CHIP_CEDAR) || (rdev->family == CHIP_PALM) || @@ -185,6 +189,7 @@ set_tex_resource(struct radeon_device *rdev, int format, int w, int h, int pitch, u64 gpu_addr, u32 size) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 sq_tex_resource_word0, sq_tex_resource_word1; u32 sq_tex_resource_word4, sq_tex_resource_word7; @@ -208,16 +213,16 @@ set_tex_resource(struct radeon_device *rdev, cp_set_surface_sync(rdev, PACKET3_TC_ACTION_ENA, size, gpu_addr); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, sq_tex_resource_word0); - radeon_ring_write(rdev, sq_tex_resource_word1); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, sq_tex_resource_word4); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, sq_tex_resource_word7); + radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, sq_tex_resource_word0); + radeon_ring_write(ring, sq_tex_resource_word1); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, sq_tex_resource_word4); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, sq_tex_resource_word7); } /* emits 12 */ @@ -225,6 +230,7 @@ static void set_scissors(struct radeon_device *rdev, int x1, int y1, int x2, int y2) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; /* workaround some hw bugs */ if (x2 == 0) x1 = 1; @@ -235,43 +241,44 @@ set_scissors(struct radeon_device *rdev, int x1, int y1, x2 = 2; } - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); - radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); + radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); } /* emits 10 */ static void draw_auto(struct radeon_device *rdev) { - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, DI_PT_RECTLIST); + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, DI_PT_RECTLIST); - radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 2) | #endif DI_INDEX_SIZE_16_BIT); - radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); - radeon_ring_write(rdev, 1); + radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(ring, 1); - radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); - radeon_ring_write(rdev, 3); - radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(ring, 3); + radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); } @@ -279,6 +286,7 @@ draw_auto(struct radeon_device *rdev) static void set_default_state(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; @@ -292,8 +300,8 @@ set_default_state(struct radeon_device *rdev) int dwords; /* set clear context state */ - radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(ring, 0); if (rdev->family < CHIP_CAYMAN) { switch (rdev->family) { @@ -550,60 +558,60 @@ set_default_state(struct radeon_device *rdev) NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); /* disable dyn gprs */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, 0); /* setup LDS */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, 0x10001000); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, 0x10001000); /* SQ config */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); - radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, sq_config); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, sq_thread_resource_mgmt); - radeon_ring_write(rdev, sq_thread_resource_mgmt_2); - radeon_ring_write(rdev, sq_stack_resource_mgmt_1); - radeon_ring_write(rdev, sq_stack_resource_mgmt_2); - radeon_ring_write(rdev, sq_stack_resource_mgmt_3); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11)); + radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, sq_config); + radeon_ring_write(ring, sq_gpr_resource_mgmt_1); + radeon_ring_write(ring, sq_gpr_resource_mgmt_2); + radeon_ring_write(ring, sq_gpr_resource_mgmt_3); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, sq_thread_resource_mgmt); + radeon_ring_write(ring, sq_thread_resource_mgmt_2); + radeon_ring_write(ring, sq_stack_resource_mgmt_1); + radeon_ring_write(ring, sq_stack_resource_mgmt_2); + radeon_ring_write(ring, sq_stack_resource_mgmt_3); } /* CONTEXT_CONTROL */ - radeon_ring_write(rdev, 0xc0012800); - radeon_ring_write(rdev, 0x80000000); - radeon_ring_write(rdev, 0x80000000); + radeon_ring_write(ring, 0xc0012800); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); /* SQ_VTX_BASE_VTX_LOC */ - radeon_ring_write(rdev, 0xc0026f00); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(ring, 0xc0026f00); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); /* SET_SAMPLER */ - radeon_ring_write(rdev, 0xc0036e00); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000012); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(ring, 0xc0036e00); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000012); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); /* set to DX10/11 mode */ - radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0)); - radeon_ring_write(rdev, 1); + radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); + radeon_ring_write(ring, 1); /* emit an IB pointing at default state */ dwords = ALIGN(rdev->r600_blit.state_len, 0x10); gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; - radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); - radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); - radeon_ring_write(rdev, dwords); + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC); + radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(ring, dwords); } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index cd4590aae154..f7442e62c03f 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -520,7 +520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case DB_Z_INFO: track->db_z_info = radeon_get_ib_value(p, idx); - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { dev_warn(p->dev, "bad SET_CONTEXT_REG " @@ -649,7 +649,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_INFO: tmp = (reg - CB_COLOR0_INFO) / 0x3c; track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { dev_warn(p->dev, "bad SET_CONTEXT_REG " @@ -666,7 +666,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_INFO: tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8; track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = evergreen_cs_packet_next_reloc(p, &reloc); if (r) { dev_warn(p->dev, "bad SET_CONTEXT_REG " @@ -1355,7 +1355,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } ib[idx+1+(i*8)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { @@ -1572,3 +1572,241 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) return 0; } +/* vm parser */ +static bool evergreen_vm_reg_valid(u32 reg) +{ + /* context regs are fine */ + if (reg >= 0x28000) + return true; + + /* check config regs */ + switch (reg) { + case GRBM_GFX_INDEX: + case VGT_VTX_VECT_EJECT_REG: + case VGT_CACHE_INVALIDATION: + case VGT_GS_VERTEX_REUSE: + case VGT_PRIMITIVE_TYPE: + case VGT_INDEX_TYPE: + case VGT_NUM_INDICES: + case VGT_NUM_INSTANCES: + case VGT_COMPUTE_DIM_X: + case VGT_COMPUTE_DIM_Y: + case VGT_COMPUTE_DIM_Z: + case VGT_COMPUTE_START_X: + case VGT_COMPUTE_START_Y: + case VGT_COMPUTE_START_Z: + case VGT_COMPUTE_INDEX: + case VGT_COMPUTE_THREAD_GROUP_SIZE: + case VGT_HS_OFFCHIP_PARAM: + case PA_CL_ENHANCE: + case PA_SU_LINE_STIPPLE_VALUE: + case PA_SC_LINE_STIPPLE_STATE: + case PA_SC_ENHANCE: + case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ: + case SQ_DYN_GPR_SIMD_LOCK_EN: + case SQ_CONFIG: + case SQ_GPR_RESOURCE_MGMT_1: + case SQ_GLOBAL_GPR_RESOURCE_MGMT_1: + case SQ_GLOBAL_GPR_RESOURCE_MGMT_2: + case SQ_CONST_MEM_BASE: + case SQ_STATIC_THREAD_MGMT_1: + case SQ_STATIC_THREAD_MGMT_2: + case SQ_STATIC_THREAD_MGMT_3: + case SPI_CONFIG_CNTL: + case SPI_CONFIG_CNTL_1: + case TA_CNTL_AUX: + case DB_DEBUG: + case DB_DEBUG2: + case DB_DEBUG3: + case DB_DEBUG4: + case DB_WATERMARKS: + case TD_PS_BORDER_COLOR_INDEX: + case TD_PS_BORDER_COLOR_RED: + case TD_PS_BORDER_COLOR_GREEN: + case TD_PS_BORDER_COLOR_BLUE: + case TD_PS_BORDER_COLOR_ALPHA: + case TD_VS_BORDER_COLOR_INDEX: + case TD_VS_BORDER_COLOR_RED: + case TD_VS_BORDER_COLOR_GREEN: + case TD_VS_BORDER_COLOR_BLUE: + case TD_VS_BORDER_COLOR_ALPHA: + case TD_GS_BORDER_COLOR_INDEX: + case TD_GS_BORDER_COLOR_RED: + case TD_GS_BORDER_COLOR_GREEN: + case TD_GS_BORDER_COLOR_BLUE: + case TD_GS_BORDER_COLOR_ALPHA: + case TD_HS_BORDER_COLOR_INDEX: + case TD_HS_BORDER_COLOR_RED: + case TD_HS_BORDER_COLOR_GREEN: + case TD_HS_BORDER_COLOR_BLUE: + case TD_HS_BORDER_COLOR_ALPHA: + case TD_LS_BORDER_COLOR_INDEX: + case TD_LS_BORDER_COLOR_RED: + case TD_LS_BORDER_COLOR_GREEN: + case TD_LS_BORDER_COLOR_BLUE: + case TD_LS_BORDER_COLOR_ALPHA: + case TD_CS_BORDER_COLOR_INDEX: + case TD_CS_BORDER_COLOR_RED: + case TD_CS_BORDER_COLOR_GREEN: + case TD_CS_BORDER_COLOR_BLUE: + case TD_CS_BORDER_COLOR_ALPHA: + case SQ_ESGS_RING_SIZE: + case SQ_GSVS_RING_SIZE: + case SQ_ESTMP_RING_SIZE: + case SQ_GSTMP_RING_SIZE: + case SQ_HSTMP_RING_SIZE: + case SQ_LSTMP_RING_SIZE: + case SQ_PSTMP_RING_SIZE: + case SQ_VSTMP_RING_SIZE: + case SQ_ESGS_RING_ITEMSIZE: + case SQ_ESTMP_RING_ITEMSIZE: + case SQ_GSTMP_RING_ITEMSIZE: + case SQ_GSVS_RING_ITEMSIZE: + case SQ_GS_VERT_ITEMSIZE: + case SQ_GS_VERT_ITEMSIZE_1: + case SQ_GS_VERT_ITEMSIZE_2: + case SQ_GS_VERT_ITEMSIZE_3: + case SQ_GSVS_RING_OFFSET_1: + case SQ_GSVS_RING_OFFSET_2: + case SQ_GSVS_RING_OFFSET_3: + case SQ_HSTMP_RING_ITEMSIZE: + case SQ_LSTMP_RING_ITEMSIZE: + case SQ_PSTMP_RING_ITEMSIZE: + case SQ_VSTMP_RING_ITEMSIZE: + case VGT_TF_RING_SIZE: + case SQ_ESGS_RING_BASE: + case SQ_GSVS_RING_BASE: + case SQ_ESTMP_RING_BASE: + case SQ_GSTMP_RING_BASE: + case SQ_HSTMP_RING_BASE: + case SQ_LSTMP_RING_BASE: + case SQ_PSTMP_RING_BASE: + case SQ_VSTMP_RING_BASE: + case CAYMAN_VGT_OFFCHIP_LDS_BASE: + case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS: + return true; + default: + return false; + } +} + +static int evergreen_vm_packet3_check(struct radeon_device *rdev, + u32 *ib, struct radeon_cs_packet *pkt) +{ + u32 idx = pkt->idx + 1; + u32 idx_value = ib[idx]; + u32 start_reg, end_reg, reg, i; + + switch (pkt->opcode) { + case PACKET3_NOP: + case PACKET3_SET_BASE: + case PACKET3_CLEAR_STATE: + case PACKET3_INDEX_BUFFER_SIZE: + case PACKET3_DISPATCH_DIRECT: + case PACKET3_DISPATCH_INDIRECT: + case PACKET3_MODE_CONTROL: + case PACKET3_SET_PREDICATION: + case PACKET3_COND_EXEC: + case PACKET3_PRED_EXEC: + case PACKET3_DRAW_INDIRECT: + case PACKET3_DRAW_INDEX_INDIRECT: + case PACKET3_INDEX_BASE: + case PACKET3_DRAW_INDEX_2: + case PACKET3_CONTEXT_CONTROL: + case PACKET3_DRAW_INDEX_OFFSET: + case PACKET3_INDEX_TYPE: + case PACKET3_DRAW_INDEX: + case PACKET3_DRAW_INDEX_AUTO: + case PACKET3_DRAW_INDEX_IMMD: + case PACKET3_NUM_INSTANCES: + case PACKET3_DRAW_INDEX_MULTI_AUTO: + case PACKET3_STRMOUT_BUFFER_UPDATE: + case PACKET3_DRAW_INDEX_OFFSET_2: + case PACKET3_DRAW_INDEX_MULTI_ELEMENT: + case PACKET3_MPEG_INDEX: + case PACKET3_WAIT_REG_MEM: + case PACKET3_MEM_WRITE: + case PACKET3_SURFACE_SYNC: + case PACKET3_EVENT_WRITE: + case PACKET3_EVENT_WRITE_EOP: + case PACKET3_EVENT_WRITE_EOS: + case PACKET3_SET_CONTEXT_REG: + case PACKET3_SET_BOOL_CONST: + case PACKET3_SET_LOOP_CONST: + case PACKET3_SET_RESOURCE: + case PACKET3_SET_SAMPLER: + case PACKET3_SET_CTL_CONST: + case PACKET3_SET_RESOURCE_OFFSET: + case PACKET3_SET_CONTEXT_REG_INDIRECT: + case PACKET3_SET_RESOURCE_INDIRECT: + case CAYMAN_PACKET3_DEALLOC_STATE: + break; + case PACKET3_COND_WRITE: + if (idx_value & 0x100) { + reg = ib[idx + 5] * 4; + if (!evergreen_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_COPY_DW: + if (idx_value & 0x2) { + reg = ib[idx + 3] * 4; + if (!evergreen_vm_reg_valid(reg)) + return -EINVAL; + } + break; + case PACKET3_SET_CONFIG_REG: + start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONFIG_REG_START) || + (start_reg >= PACKET3_SET_CONFIG_REG_END) || + (end_reg >= PACKET3_SET_CONFIG_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + if (!evergreen_vm_reg_valid(reg)) + return -EINVAL; + } + break; + default: + return -EINVAL; + } + return 0; +} + +int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) +{ + int ret = 0; + u32 idx = 0; + struct radeon_cs_packet pkt; + + do { + pkt.idx = idx; + pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]); + pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]); + pkt.one_reg_wr = 0; + switch (pkt.type) { + case PACKET_TYPE0: + dev_err(rdev->dev, "Packet0 not allowed!\n"); + ret = -EINVAL; + break; + case PACKET_TYPE2: + break; + case PACKET_TYPE3: + pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]); + ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt); + break; + default: + dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type); + ret = -EINVAL; + break; + } + if (ret) + break; + idx += pkt.count + 2; + } while (idx < ib->length_dw); + + return ret; +} diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h index 7d7f2155e34c..4215de95477e 100644 --- a/drivers/gpu/drm/radeon/evergreen_reg.h +++ b/drivers/gpu/drm/radeon/evergreen_reg.h @@ -35,6 +35,14 @@ #define EVERGREEN_P1PLL_SS_CNTL 0x414 #define EVERGREEN_P2PLL_SS_CNTL 0x454 # define EVERGREEN_PxPLL_SS_EN (1 << 12) + +#define EVERGREEN_AUDIO_PLL1_MUL 0x5b0 +#define EVERGREEN_AUDIO_PLL1_DIV 0x5b4 +#define EVERGREEN_AUDIO_PLL1_UNK 0x5bc + +#define EVERGREEN_AUDIO_ENABLE 0x5e78 +#define EVERGREEN_AUDIO_VENDOR_ID 0x5ec0 + /* GRPH blocks at 0x6800, 0x7400, 0x10000, 0x10c00, 0x11800, 0x12400 */ #define EVERGREEN_GRPH_ENABLE 0x6800 #define EVERGREEN_GRPH_CONTROL 0x6804 @@ -220,4 +228,9 @@ #define EVERGREEN_DC_GPIO_HPD_EN 0x64b8 #define EVERGREEN_DC_GPIO_HPD_Y 0x64bc +/* HDMI blocks at 0x7030, 0x7c30, 0x10830, 0x11430, 0x12030, 0x12c30 */ +#define EVERGREEN_HDMI_BASE 0x7030 + +#define EVERGREEN_HDMI_CONFIG_OFFSET 0xf0 + #endif diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index e00039e59a75..b502216d42af 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -242,6 +242,7 @@ #define PA_CL_ENHANCE 0x8A14 #define CLIP_VTX_REORDER_ENA (1 << 0) #define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_ENHANCE 0x8BF0 #define PA_SC_AA_CONFIG 0x28C04 #define MSAA_NUM_SAMPLES_SHIFT 0 #define MSAA_NUM_SAMPLES_MASK 0x3 @@ -319,6 +320,8 @@ #define SQ_GPR_RESOURCE_MGMT_3 0x8C0C #define NUM_HS_GPRS(x) ((x) << 0) #define NUM_LS_GPRS(x) ((x) << 16) +#define SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x8C10 +#define SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x8C14 #define SQ_THREAD_RESOURCE_MGMT 0x8C18 #define NUM_PS_THREADS(x) ((x) << 0) #define NUM_VS_THREADS(x) ((x) << 8) @@ -337,6 +340,10 @@ #define NUM_HS_STACK_ENTRIES(x) ((x) << 0) #define NUM_LS_STACK_ENTRIES(x) ((x) << 16) #define SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x8D8C +#define SQ_DYN_GPR_SIMD_LOCK_EN 0x8D94 +#define SQ_STATIC_THREAD_MGMT_1 0x8E20 +#define SQ_STATIC_THREAD_MGMT_2 0x8E24 +#define SQ_STATIC_THREAD_MGMT_3 0x8E28 #define SQ_LDS_RESOURCE_MGMT 0x8E2C #define SQ_MS_FIFO_SIZES 0x8CF0 @@ -691,6 +698,7 @@ #define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36 #define PACKET3_MEM_SEMAPHORE 0x39 #define PACKET3_MPEG_INDEX 0x3A +#define PACKET3_COPY_DW 0x3B #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D #define PACKET3_INDIRECT_BUFFER 0x32 @@ -768,6 +776,8 @@ #define SQ_TEX_VTX_VALID_TEXTURE 0x2 #define SQ_TEX_VTX_VALID_BUFFER 0x3 +#define VGT_VTX_VECT_EJECT_REG 0x88b0 + #define SQ_CONST_MEM_BASE 0x8df8 #define SQ_ESGS_RING_BASE 0x8c40 @@ -892,8 +902,27 @@ #define PA_SC_SCREEN_SCISSOR_TL 0x28030 #define PA_SC_GENERIC_SCISSOR_TL 0x28240 #define PA_SC_WINDOW_SCISSOR_TL 0x28204 -#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_INDEX_TYPE 0x895C + +#define VGT_NUM_INDICES 0x8970 + +#define VGT_COMPUTE_DIM_X 0x8990 +#define VGT_COMPUTE_DIM_Y 0x8994 +#define VGT_COMPUTE_DIM_Z 0x8998 +#define VGT_COMPUTE_START_X 0x899C +#define VGT_COMPUTE_START_Y 0x89A0 +#define VGT_COMPUTE_START_Z 0x89A4 +#define VGT_COMPUTE_INDEX 0x89A8 +#define VGT_COMPUTE_THREAD_GROUP_SIZE 0x89AC +#define VGT_HS_OFFCHIP_PARAM 0x89B0 + +#define DB_DEBUG 0x9830 +#define DB_DEBUG2 0x9834 +#define DB_DEBUG3 0x9838 +#define DB_DEBUG4 0x983C +#define DB_WATERMARKS 0x9854 #define DB_DEPTH_CONTROL 0x28800 #define DB_DEPTH_VIEW 0x28008 #define DB_HTILE_DATA_BASE 0x28014 @@ -1189,8 +1218,40 @@ #define SQ_VTX_CONSTANT_WORD6_0 0x30018 #define SQ_VTX_CONSTANT_WORD7_0 0x3001c +#define TD_PS_BORDER_COLOR_INDEX 0xA400 +#define TD_PS_BORDER_COLOR_RED 0xA404 +#define TD_PS_BORDER_COLOR_GREEN 0xA408 +#define TD_PS_BORDER_COLOR_BLUE 0xA40C +#define TD_PS_BORDER_COLOR_ALPHA 0xA410 +#define TD_VS_BORDER_COLOR_INDEX 0xA414 +#define TD_VS_BORDER_COLOR_RED 0xA418 +#define TD_VS_BORDER_COLOR_GREEN 0xA41C +#define TD_VS_BORDER_COLOR_BLUE 0xA420 +#define TD_VS_BORDER_COLOR_ALPHA 0xA424 +#define TD_GS_BORDER_COLOR_INDEX 0xA428 +#define TD_GS_BORDER_COLOR_RED 0xA42C +#define TD_GS_BORDER_COLOR_GREEN 0xA430 +#define TD_GS_BORDER_COLOR_BLUE 0xA434 +#define TD_GS_BORDER_COLOR_ALPHA 0xA438 +#define TD_HS_BORDER_COLOR_INDEX 0xA43C +#define TD_HS_BORDER_COLOR_RED 0xA440 +#define TD_HS_BORDER_COLOR_GREEN 0xA444 +#define TD_HS_BORDER_COLOR_BLUE 0xA448 +#define TD_HS_BORDER_COLOR_ALPHA 0xA44C +#define TD_LS_BORDER_COLOR_INDEX 0xA450 +#define TD_LS_BORDER_COLOR_RED 0xA454 +#define TD_LS_BORDER_COLOR_GREEN 0xA458 +#define TD_LS_BORDER_COLOR_BLUE 0xA45C +#define TD_LS_BORDER_COLOR_ALPHA 0xA460 +#define TD_CS_BORDER_COLOR_INDEX 0xA464 +#define TD_CS_BORDER_COLOR_RED 0xA468 +#define TD_CS_BORDER_COLOR_GREEN 0xA46C +#define TD_CS_BORDER_COLOR_BLUE 0xA470 +#define TD_CS_BORDER_COLOR_ALPHA 0xA474 + /* cayman 3D regs */ -#define CAYMAN_VGT_OFFCHIP_LDS_BASE 0x89B0 +#define CAYMAN_VGT_OFFCHIP_LDS_BASE 0x89B4 +#define CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS 0x8E48 #define CAYMAN_DB_EQAA 0x28804 #define CAYMAN_DB_DEPTH_INFO 0x2803C #define CAYMAN_PA_SC_AA_CONFIG 0x28BE0 diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 0e5799857465..321137295400 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -934,7 +934,7 @@ void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev) int cayman_pcie_gart_enable(struct radeon_device *rdev) { - int r; + int i, r; if (rdev->gart.robj == NULL) { dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); @@ -945,9 +945,12 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev) return r; radeon_gart_restore(rdev); /* Setup TLB control */ - WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB | + WREG32(MC_VM_MX_L1_TLB_CNTL, + (0xA << 7) | + ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | SYSTEM_ACCESS_MODE_NOT_IN_SYS | + ENABLE_ADVANCED_DRIVER_MODEL | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | @@ -967,9 +970,26 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT0_CNTL2, 0); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); - /* disable context1-7 */ + + WREG32(0x15D4, 0); + WREG32(0x15D8, 0); + WREG32(0x15DC, 0); + + /* empty context1-7 */ + for (i = 1; i < 8; i++) { + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), 0); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), + rdev->gart.table_addr >> 12); + } + + /* enable context1-7 */ + WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); WREG32(VM_CONTEXT1_CNTL2, 0); WREG32(VM_CONTEXT1_CNTL, 0); + WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); cayman_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", @@ -1006,9 +1026,69 @@ void cayman_pcie_gart_fini(struct radeon_device *rdev) radeon_gart_fini(rdev); } +void cayman_cp_int_cntl_setup(struct radeon_device *rdev, + int ring, u32 cp_int_cntl) +{ + u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3; + + WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3)); + WREG32(CP_INT_CNTL, cp_int_cntl); +} + /* * CP. */ +void cayman_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + + /* flush read cache over gart for this vmid */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ + /* EVENT_WRITE_EOP - flush caches, send int */ + radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, 0); +} + +void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + + /* set to DX10/11 mode */ + radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); + radeon_ring_write(ring, 1); + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + (ib->gpu_addr & 0xFFFFFFFC)); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); + radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + + /* flush read cache over gart for this vmid */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, ib->vm_id); + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ +} + static void cayman_cp_enable(struct radeon_device *rdev, bool enable) { if (enable) @@ -1049,63 +1129,64 @@ static int cayman_cp_load_microcode(struct radeon_device *rdev) static int cayman_cp_start(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r, i; - r = radeon_ring_lock(rdev, 7); + r = radeon_ring_lock(rdev, ring, 7); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); return r; } - radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); - radeon_ring_write(rdev, 0x1); - radeon_ring_write(rdev, 0x0); - radeon_ring_write(rdev, rdev->config.cayman.max_hw_contexts - 1); - radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(ring, 0x1); + radeon_ring_write(ring, 0x0); + radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1); + radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_unlock_commit(rdev, ring); cayman_cp_enable(rdev, true); - r = radeon_ring_lock(rdev, cayman_default_size + 19); + r = radeon_ring_lock(rdev, ring, cayman_default_size + 19); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); return r; } /* setup clear context state */ - radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); for (i = 0; i < cayman_default_size; i++) - radeon_ring_write(rdev, cayman_default_state[i]); + radeon_ring_write(ring, cayman_default_state[i]); - radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE); + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); /* set clear context state */ - radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(ring, 0); /* SQ_VTX_BASE_VTX_LOC */ - radeon_ring_write(rdev, 0xc0026f00); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); - radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(ring, 0xc0026f00); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); + radeon_ring_write(ring, 0x00000000); /* Clear consts */ - radeon_ring_write(rdev, 0xc0036f00); - radeon_ring_write(rdev, 0x00000bc4); - radeon_ring_write(rdev, 0xffffffff); - radeon_ring_write(rdev, 0xffffffff); - radeon_ring_write(rdev, 0xffffffff); + radeon_ring_write(ring, 0xc0036f00); + radeon_ring_write(ring, 0x00000bc4); + radeon_ring_write(ring, 0xffffffff); + radeon_ring_write(ring, 0xffffffff); + radeon_ring_write(ring, 0xffffffff); - radeon_ring_write(rdev, 0xc0026900); - radeon_ring_write(rdev, 0x00000316); - radeon_ring_write(rdev, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - radeon_ring_write(rdev, 0x00000010); /* */ + radeon_ring_write(ring, 0xc0026900); + radeon_ring_write(ring, 0x00000316); + radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + radeon_ring_write(ring, 0x00000010); /* */ - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); /* XXX init other rings */ @@ -1115,11 +1196,12 @@ static int cayman_cp_start(struct radeon_device *rdev) static void cayman_cp_fini(struct radeon_device *rdev) { cayman_cp_enable(rdev, false); - radeon_ring_fini(rdev); + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); } int cayman_cp_resume(struct radeon_device *rdev) { + struct radeon_ring *ring; u32 tmp; u32 rb_bufsz; int r; @@ -1136,7 +1218,7 @@ int cayman_cp_resume(struct radeon_device *rdev) WREG32(GRBM_SOFT_RESET, 0); RREG32(GRBM_SOFT_RESET); - WREG32(CP_SEM_WAIT_TIMER, 0x4); + WREG32(CP_SEM_WAIT_TIMER, 0x0); /* Set the write pointer delay */ WREG32(CP_RB_WPTR_DELAY, 0); @@ -1145,7 +1227,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* ring 0 - compute and gfx */ /* Set ring buffer size */ - rb_bufsz = drm_order(rdev->cp.ring_size / 8); + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; @@ -1154,8 +1237,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); - rdev->cp.wptr = 0; - WREG32(CP_RB0_WPTR, rdev->cp.wptr); + ring->wptr = 0; + WREG32(CP_RB0_WPTR, ring->wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); @@ -1172,13 +1255,14 @@ int cayman_cp_resume(struct radeon_device *rdev) mdelay(1); WREG32(CP_RB0_CNTL, tmp); - WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8); + WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); - rdev->cp.rptr = RREG32(CP_RB0_RPTR); + ring->rptr = RREG32(CP_RB0_RPTR); /* ring1 - compute only */ /* Set ring buffer size */ - rb_bufsz = drm_order(rdev->cp1.ring_size / 8); + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; @@ -1187,8 +1271,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA); - rdev->cp1.wptr = 0; - WREG32(CP_RB1_WPTR, rdev->cp1.wptr); + ring->wptr = 0; + WREG32(CP_RB1_WPTR, ring->wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC); @@ -1197,13 +1281,14 @@ int cayman_cp_resume(struct radeon_device *rdev) mdelay(1); WREG32(CP_RB1_CNTL, tmp); - WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8); + WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); - rdev->cp1.rptr = RREG32(CP_RB1_RPTR); + ring->rptr = RREG32(CP_RB1_RPTR); /* ring2 - compute only */ /* Set ring buffer size */ - rb_bufsz = drm_order(rdev->cp2.ring_size / 8); + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; @@ -1212,8 +1297,8 @@ int cayman_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA); - rdev->cp2.wptr = 0; - WREG32(CP_RB2_WPTR, rdev->cp2.wptr); + ring->wptr = 0; + WREG32(CP_RB2_WPTR, ring->wptr); /* set the wb address wether it's enabled or not */ WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC); @@ -1222,28 +1307,28 @@ int cayman_cp_resume(struct radeon_device *rdev) mdelay(1); WREG32(CP_RB2_CNTL, tmp); - WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8); + WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); - rdev->cp2.rptr = RREG32(CP_RB2_RPTR); + ring->rptr = RREG32(CP_RB2_RPTR); /* start the rings */ cayman_cp_start(rdev); - rdev->cp.ready = true; - rdev->cp1.ready = true; - rdev->cp2.ready = true; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; /* this only test cp0 */ - r = radeon_ring_test(rdev); + r = radeon_ring_test(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); if (r) { - rdev->cp.ready = false; - rdev->cp1.ready = false; - rdev->cp2.ready = false; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; return r; } return 0; } -bool cayman_gpu_is_lockup(struct radeon_device *rdev) +bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { u32 srbm_status; u32 grbm_status; @@ -1256,20 +1341,20 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev) grbm_status_se0 = RREG32(GRBM_STATUS_SE0); grbm_status_se1 = RREG32(GRBM_STATUS_SE1); if (!(grbm_status & GUI_ACTIVE)) { - r100_gpu_lockup_update(lockup, &rdev->cp); + r100_gpu_lockup_update(lockup, ring); return false; } /* force CP activities */ - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (!r) { /* PACKET2 NOP */ - radeon_ring_write(rdev, 0x80000000); - radeon_ring_write(rdev, 0x80000000); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + radeon_ring_unlock_commit(rdev, ring); } /* XXX deal with CP0,1,2 */ - rdev->cp.rptr = RREG32(CP_RB0_RPTR); - return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp); + ring->rptr = RREG32(ring->rptr_reg); + return r100_gpu_cp_is_lockup(rdev, lockup, ring); } static int cayman_gpu_soft_reset(struct radeon_device *rdev) @@ -1289,6 +1374,15 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev) RREG32(GRBM_STATUS_SE1)); dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", RREG32(SRBM_STATUS)); + dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n", + RREG32(0x14F8)); + dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n", + RREG32(0x14D8)); + dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", + RREG32(0x14FC)); + dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", + RREG32(0x14DC)); + evergreen_mc_stop(rdev, &save); if (evergreen_mc_wait_for_idle(rdev)) { dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); @@ -1319,6 +1413,7 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev) (void)RREG32(GRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); + dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", RREG32(GRBM_STATUS)); dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", @@ -1338,6 +1433,7 @@ int cayman_asic_reset(struct radeon_device *rdev) static int cayman_startup(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; /* enable pcie gen2 link */ @@ -1378,6 +1474,24 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -1387,7 +1501,9 @@ static int cayman_startup(struct radeon_device *rdev) } evergreen_irq_set(rdev); - r = radeon_ring_init(rdev, rdev->cp.ring_size); + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, + CP_RB0_RPTR, CP_RB0_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; r = cayman_cp_load_microcode(rdev); @@ -1397,6 +1513,21 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + DRM_ERROR("radeon: failed testing IB (%d).\n", r); + rdev->accel_working = false; + return r; + } + + r = radeon_vm_manager_start(rdev); + if (r) + return r; + return 0; } @@ -1411,32 +1542,26 @@ int cayman_resume(struct radeon_device *rdev) /* post card */ atom_asic_init(rdev->mode_info.atom_context); + rdev->accel_working = true; r = cayman_startup(rdev); if (r) { DRM_ERROR("cayman startup failed on resume\n"); return r; } - - r = r600_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failled testing IB (%d).\n", r); - return r; - } - return r; - } int cayman_suspend(struct radeon_device *rdev) { /* FIXME: we should wait for ring to be empty */ + radeon_ib_pool_suspend(rdev); + radeon_vm_manager_suspend(rdev); + r600_blit_suspend(rdev); cayman_cp_enable(rdev, false); - rdev->cp.ready = false; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); cayman_pcie_gart_disable(rdev); - r600_blit_suspend(rdev); - return 0; } @@ -1448,6 +1573,7 @@ int cayman_suspend(struct radeon_device *rdev) */ int cayman_init(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; /* This don't do much */ @@ -1500,8 +1626,8 @@ int cayman_init(struct radeon_device *rdev) if (r) return r; - rdev->cp.ring_obj = NULL; - r600_ring_init(rdev, 1024 * 1024); + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 1024 * 1024); rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1510,29 +1636,29 @@ int cayman_init(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = radeon_vm_manager_init(rdev); + if (r) { + dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); + } + r = cayman_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); cayman_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); + r100_ib_fini(rdev); + radeon_vm_manager_fini(rdev); radeon_irq_kms_fini(rdev); cayman_pcie_gart_fini(rdev); rdev->accel_working = false; } - if (rdev->accel_working) { - r = radeon_ib_pool_init(rdev); - if (r) { - DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r); - rdev->accel_working = false; - } - r = r600_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d).\n", r); - rdev->accel_working = false; - } - } /* Don't start up if the MC ucode is missing. * The default clocks and voltages before the MC ucode @@ -1552,11 +1678,13 @@ void cayman_fini(struct radeon_device *rdev) cayman_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - radeon_ib_pool_fini(rdev); + radeon_vm_manager_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); + radeon_semaphore_driver_fini(rdev); radeon_fence_driver_fini(rdev); radeon_bo_fini(rdev); radeon_atombios_fini(rdev); @@ -1564,3 +1692,84 @@ void cayman_fini(struct radeon_device *rdev) rdev->bios = NULL; } +/* + * vm + */ +int cayman_vm_init(struct radeon_device *rdev) +{ + /* number of VMs */ + rdev->vm_manager.nvm = 8; + /* base offset of vram pages */ + rdev->vm_manager.vram_base_offset = 0; + return 0; +} + +void cayman_vm_fini(struct radeon_device *rdev) +{ +} + +int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) +{ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + /* bits 0-7 are the VM contexts0-7 */ + WREG32(VM_INVALIDATE_REQUEST, 1 << id); + return 0; +} + +void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) +{ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0); + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + /* bits 0-7 are the VM contexts0-7 */ + WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); +} + +void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) +{ + if (vm->id == -1) + return; + + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + /* bits 0-7 are the VM contexts0-7 */ + WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); +} + +#define R600_PTE_VALID (1 << 0) +#define R600_PTE_SYSTEM (1 << 1) +#define R600_PTE_SNOOPED (1 << 2) +#define R600_PTE_READABLE (1 << 5) +#define R600_PTE_WRITEABLE (1 << 6) + +uint32_t cayman_vm_page_flags(struct radeon_device *rdev, + struct radeon_vm *vm, + uint32_t flags) +{ + uint32_t r600_flags = 0; + + r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; + r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; + r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; + if (flags & RADEON_VM_PAGE_SYSTEM) { + r600_flags |= R600_PTE_SYSTEM; + r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; + } + return r600_flags; +} + +void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, + unsigned pfn, uint64_t addr, uint32_t flags) +{ + void __iomem *ptr = (void *)vm->pt; + + addr = addr & 0xFFFFFFFFFFFFF000ULL; + addr |= flags; + writeq(addr, ptr + (pfn * 8)); +} diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 4672869cdb26..f9df2a645e79 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -42,6 +42,9 @@ #define CAYMAN_MAX_TCC_MASK 0xFF #define DMIF_ADDR_CONFIG 0xBD4 +#define SRBM_GFX_CNTL 0x0E44 +#define RINGID(x) (((x) & 0x3) << 0) +#define VMID(x) (((x) & 0x7) << 0) #define SRBM_STATUS 0x0E50 #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 @@ -219,6 +222,7 @@ #define SCRATCH_UMSK 0x8540 #define SCRATCH_ADDR 0x8544 #define CP_SEM_WAIT_TIMER 0x85BC +#define CP_COHER_CNTL2 0x85E8 #define CP_ME_CNTL 0x86D8 #define CP_ME_HALT (1 << 28) #define CP_PFP_HALT (1 << 26) @@ -394,6 +398,12 @@ #define CP_RB0_RPTR_ADDR 0xC10C #define CP_RB0_RPTR_ADDR_HI 0xC110 #define CP_RB0_WPTR 0xC114 + +#define CP_INT_CNTL 0xC124 +# define CNTX_BUSY_INT_ENABLE (1 << 19) +# define CNTX_EMPTY_INT_ENABLE (1 << 20) +# define TIME_STAMP_INT_ENABLE (1 << 26) + #define CP_RB1_BASE 0xC180 #define CP_RB1_CNTL 0xC184 #define CP_RB1_RPTR_ADDR 0xC188 @@ -411,6 +421,10 @@ #define CP_ME_RAM_DATA 0xC160 #define CP_DEBUG 0xC1FC +#define VGT_EVENT_INITIATOR 0x28a90 +# define CACHE_FLUSH_AND_INV_EVENT_TS (0x14 << 0) +# define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) + /* * PM4 */ @@ -445,6 +459,7 @@ #define PACKET3_DISPATCH_DIRECT 0x15 #define PACKET3_DISPATCH_INDIRECT 0x16 #define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_MODE_CONTROL 0x18 #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 #define PACKET3_COND_EXEC 0x22 @@ -494,7 +509,27 @@ #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) #define PACKET3_COND_WRITE 0x45 #define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ #define PACKET3_EVENT_WRITE_EOP 0x47 +#define DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit counter value + */ +#define INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ #define PACKET3_EVENT_WRITE_EOS 0x48 #define PACKET3_PREAMBLE_CNTL 0x4A # define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index bfc08f6320f8..3ec81c3d5108 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -667,7 +667,7 @@ int r100_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { tmp |= RADEON_SW_INT_ENABLE; } if (rdev->irq.gui_idle) { @@ -739,7 +739,7 @@ int r100_irq_process(struct radeon_device *rdev) while (status) { /* SW interrupt */ if (status & RADEON_SW_INT_TEST) { - radeon_fence_process(rdev); + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); } /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { @@ -811,25 +811,36 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) void r100_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { + struct radeon_ring *ring = &rdev->ring[fence->ring]; + /* We have to make sure that caches are flushed before * CPU might read something from VRAM. */ - radeon_ring_write(rdev, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, RADEON_RB3D_DC_FLUSH_ALL); - radeon_ring_write(rdev, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, RADEON_RB3D_ZC_FLUSH_ALL); + radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL); + radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL); /* Wait until IDLE & CLEAN */ - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); - radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(rdev, rdev->config.r100.hdp_cntl | + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl | RADEON_HDP_READ_BUFFER_INVALIDATE); - radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(rdev, rdev->config.r100.hdp_cntl); + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl); /* Emit fence sequence & fire IRQ */ - radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0)); - radeon_ring_write(rdev, fence->seq); - radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0)); - radeon_ring_write(rdev, RADEON_SW_INT_FIRE); + radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0)); + radeon_ring_write(ring, RADEON_SW_INT_FIRE); +} + +void r100_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + /* Unused on older asics, since we don't have semaphores or multiple rings */ + BUG(); } int r100_copy_blit(struct radeon_device *rdev, @@ -838,6 +849,7 @@ int r100_copy_blit(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence *fence) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t cur_pages; uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE; uint32_t pitch; @@ -855,7 +867,7 @@ int r100_copy_blit(struct radeon_device *rdev, /* Ask for enough room for blit + flush + fence */ ndw = 64 + (10 * num_loops); - r = radeon_ring_lock(rdev, ndw); + r = radeon_ring_lock(rdev, ring, ndw); if (r) { DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); return -EINVAL; @@ -869,8 +881,8 @@ int r100_copy_blit(struct radeon_device *rdev, /* pages are in Y direction - height page width in X direction - width */ - radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8)); + radeon_ring_write(ring, RADEON_GMC_SRC_PITCH_OFFSET_CNTL | RADEON_GMC_DST_PITCH_OFFSET_CNTL | RADEON_GMC_SRC_CLIPPING | @@ -882,26 +894,26 @@ int r100_copy_blit(struct radeon_device *rdev, RADEON_DP_SRC_SOURCE_MEMORY | RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); - radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10)); - radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10)); - radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); - radeon_ring_write(rdev, num_gpu_pages); - radeon_ring_write(rdev, num_gpu_pages); - radeon_ring_write(rdev, cur_pages | (stride_pixels << 16)); - } - radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL); - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10)); + radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10)); + radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); + radeon_ring_write(ring, num_gpu_pages); + radeon_ring_write(ring, num_gpu_pages); + radeon_ring_write(ring, cur_pages | (stride_pixels << 16)); + } + radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL); + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); if (fence) { r = radeon_fence_emit(rdev, fence); } - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); return r; } @@ -922,19 +934,20 @@ static int r100_cp_wait_for_idle(struct radeon_device *rdev) void r100_ring_start(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (r) { return; } - radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0)); + radeon_ring_write(ring, RADEON_ISYNC_ANY2D_IDLE3D | RADEON_ISYNC_ANY3D_IDLE2D | RADEON_ISYNC_WAIT_IDLEGUI | RADEON_ISYNC_CPSCRATCH_IDLEGUI); - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); } @@ -1035,6 +1048,7 @@ static void r100_cp_load_microcode(struct radeon_device *rdev) int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; unsigned rb_bufsz; unsigned rb_blksz; unsigned max_fetch; @@ -1060,7 +1074,9 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) rb_bufsz = drm_order(ring_size / 8); ring_size = (1 << (rb_bufsz + 1)) * 4; r100_cp_load_microcode(rdev); - r = radeon_ring_init(rdev, ring_size); + r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, + RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, + 0, 0x7fffff, RADEON_CP_PACKET2); if (r) { return r; } @@ -1069,7 +1085,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) rb_blksz = 9; /* cp will read 128bytes at a time (4 dwords) */ max_fetch = 1; - rdev->cp.align_mask = 16 - 1; + ring->align_mask = 16 - 1; /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ pre_write_timer = 64; /* Force CP_RB_WPTR write if written more than one time before the @@ -1099,13 +1115,13 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE); /* Set ring address */ - DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); - WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); + DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr); + WREG32(RADEON_CP_RB_BASE, ring->gpu_addr); /* Force read & write ptr to 0 */ WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE); WREG32(RADEON_CP_RB_RPTR_WR, 0); - rdev->cp.wptr = 0; - WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); + ring->wptr = 0; + WREG32(RADEON_CP_RB_WPTR, ring->wptr); /* set the wb address whether it's enabled or not */ WREG32(R_00070C_CP_RB_RPTR_ADDR, @@ -1121,7 +1137,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(RADEON_CP_RB_CNTL, tmp); udelay(10); - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + ring->rptr = RREG32(RADEON_CP_RB_RPTR); /* Set cp mode to bus mastering & enable cp*/ WREG32(RADEON_CP_CSQ_MODE, REG_SET(RADEON_INDIRECT2_START, indirect2_start) | @@ -1130,12 +1146,12 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D); WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); radeon_ring_start(rdev); - r = radeon_ring_test(rdev); + r = radeon_ring_test(rdev, ring); if (r) { DRM_ERROR("radeon: cp isn't working (%d).\n", r); return r; } - rdev->cp.ready = true; + ring->ready = true; radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } @@ -1147,7 +1163,7 @@ void r100_cp_fini(struct radeon_device *rdev) } /* Disable ring */ r100_cp_disable(rdev); - radeon_ring_fini(rdev); + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); DRM_INFO("radeon: cp finalized\n"); } @@ -1155,7 +1171,7 @@ void r100_cp_disable(struct radeon_device *rdev) { /* Disable ring */ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - rdev->cp.ready = false; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; WREG32(RADEON_CP_CSQ_MODE, 0); WREG32(RADEON_CP_CSQ_CNTL, 0); WREG32(R_000770_SCRATCH_UMSK, 0); @@ -1165,13 +1181,6 @@ void r100_cp_disable(struct radeon_device *rdev) } } -void r100_cp_commit(struct radeon_device *rdev) -{ - WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); - (void)RREG32(RADEON_CP_RB_WPTR); -} - - /* * CS functions */ @@ -2099,9 +2108,9 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev) return -1; } -void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp) +void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_ring *ring) { - lockup->last_cp_rptr = cp->rptr; + lockup->last_cp_rptr = ring->rptr; lockup->last_jiffies = jiffies; } @@ -2126,20 +2135,20 @@ void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp * false positive when CP is just gived nothing to do. * **/ -bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp) +bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_ring *ring) { unsigned long cjiffies, elapsed; cjiffies = jiffies; if (!time_after(cjiffies, lockup->last_jiffies)) { /* likely a wrap around */ - lockup->last_cp_rptr = cp->rptr; + lockup->last_cp_rptr = ring->rptr; lockup->last_jiffies = jiffies; return false; } - if (cp->rptr != lockup->last_cp_rptr) { + if (ring->rptr != lockup->last_cp_rptr) { /* CP is still working no lockup */ - lockup->last_cp_rptr = cp->rptr; + lockup->last_cp_rptr = ring->rptr; lockup->last_jiffies = jiffies; return false; } @@ -2152,31 +2161,32 @@ bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *l return false; } -bool r100_gpu_is_lockup(struct radeon_device *rdev) +bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { u32 rbbm_status; int r; rbbm_status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(rbbm_status)) { - r100_gpu_lockup_update(&rdev->config.r100.lockup, &rdev->cp); + r100_gpu_lockup_update(&rdev->config.r100.lockup, ring); return false; } /* force CP activities */ - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (!r) { /* PACKET2 NOP */ - radeon_ring_write(rdev, 0x80000000); - radeon_ring_write(rdev, 0x80000000); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + radeon_ring_unlock_commit(rdev, ring); } - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); - return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp); + ring->rptr = RREG32(ring->rptr_reg); + return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, ring); } void r100_bm_disable(struct radeon_device *rdev) { u32 tmp; + u16 tmp16; /* disable bus mastering */ tmp = RREG32(R_000030_BUS_CNTL); @@ -2187,8 +2197,8 @@ void r100_bm_disable(struct radeon_device *rdev) WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); tmp = RREG32(RADEON_BUS_CNTL); mdelay(1); - pci_read_config_word(rdev->pdev, 0x4, (u16*)&tmp); - pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB); + pci_read_config_word(rdev->pdev, 0x4, &tmp16); + pci_write_config_word(rdev->pdev, 0x4, tmp16 & 0xFFFB); mdelay(1); } @@ -2579,21 +2589,22 @@ static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t rdp, wdp; unsigned count, i, j; - radeon_ring_free_size(rdev); + radeon_ring_free_size(rdev, ring); rdp = RREG32(RADEON_CP_RB_RPTR); wdp = RREG32(RADEON_CP_RB_WPTR); - count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; + count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); - seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); + seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); seq_printf(m, "%u dwords in ring\n", count); for (j = 0; j <= count; j++) { - i = (rdp + j) & rdev->cp.ptr_mask; - seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); + i = (rdp + j) & ring->ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); } return 0; } @@ -3635,7 +3646,7 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track } } -int r100_ring_test(struct radeon_device *rdev) +int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) { uint32_t scratch; uint32_t tmp = 0; @@ -3648,15 +3659,15 @@ int r100_ring_test(struct radeon_device *rdev) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); radeon_scratch_free(rdev, scratch); return r; } - radeon_ring_write(rdev, PACKET0(scratch, 0)); - radeon_ring_write(rdev, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, PACKET0(scratch, 0)); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { @@ -3677,9 +3688,11 @@ int r100_ring_test(struct radeon_device *rdev) void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); - radeon_ring_write(rdev, ib->gpu_addr); - radeon_ring_write(rdev, ib->length_dw); + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + + radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1)); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, ib->length_dw); } int r100_ib_test(struct radeon_device *rdev) @@ -3696,7 +3709,7 @@ int r100_ib_test(struct radeon_device *rdev) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, &ib); + r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, 256); if (r) { return r; } @@ -3740,34 +3753,16 @@ int r100_ib_test(struct radeon_device *rdev) void r100_ib_fini(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); radeon_ib_pool_fini(rdev); } -int r100_ib_init(struct radeon_device *rdev) -{ - int r; - - r = radeon_ib_pool_init(rdev); - if (r) { - dev_err(rdev->dev, "failed initializing IB pool (%d).\n", r); - r100_ib_fini(rdev); - return r; - } - r = r100_ib_test(rdev); - if (r) { - dev_err(rdev->dev, "failed testing IB (%d).\n", r); - r100_ib_fini(rdev); - return r; - } - return 0; -} - void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) { /* Shutdown CP we shouldn't need to do that but better be safe than * sorry */ - rdev->cp.ready = false; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; WREG32(R_000740_CP_CSQ_CNTL, 0); /* Save few CRTC registers */ @@ -3905,6 +3900,12 @@ static int r100_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -3914,11 +3915,18 @@ static int r100_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } + return 0; } @@ -3941,11 +3949,14 @@ int r100_resume(struct radeon_device *rdev) r100_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return r100_startup(rdev); } int r100_suspend(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); r100_irq_disable(rdev); @@ -4064,7 +4075,14 @@ int r100_init(struct radeon_device *rdev) return r; } r100_set_safe_registers(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = r100_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index a1f3ba063c2d..eba4cbfa78f6 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -87,6 +87,7 @@ int r200_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence *fence) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t size; uint32_t cur_size; int i, num_loops; @@ -95,33 +96,33 @@ int r200_copy_dma(struct radeon_device *rdev, /* radeon pitch is /64 */ size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT; num_loops = DIV_ROUND_UP(size, 0x1FFFFF); - r = radeon_ring_lock(rdev, num_loops * 4 + 64); + r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return r; } /* Must wait for 2D idle & clean before DMA or hangs might happen */ - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, (1 << 16)); + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, (1 << 16)); for (i = 0; i < num_loops; i++) { cur_size = size; if (cur_size > 0x1FFFFF) { cur_size = 0x1FFFFF; } size -= cur_size; - radeon_ring_write(rdev, PACKET0(0x720, 2)); - radeon_ring_write(rdev, src_offset); - radeon_ring_write(rdev, dst_offset); - radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30)); + radeon_ring_write(ring, PACKET0(0x720, 2)); + radeon_ring_write(ring, src_offset); + radeon_ring_write(ring, dst_offset); + radeon_ring_write(ring, cur_size | (1 << 31) | (1 << 30)); src_offset += cur_size; dst_offset += cur_size; } - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE); + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); if (fence) { r = radeon_fence_emit(rdev, fence); } - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); return r; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index c93bc64707e1..3fc0d29a5f39 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -175,37 +175,40 @@ void rv370_pcie_gart_fini(struct radeon_device *rdev) void r300_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { + struct radeon_ring *ring = &rdev->ring[fence->ring]; + /* Who ever call radeon_fence_emit should call ring_lock and ask * for enough space (today caller are ib schedule and buffer move) */ /* Write SC register so SC & US assert idle */ - radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_TL, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_BR, 0)); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET0(R300_RE_SCISSORS_TL, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(R300_RE_SCISSORS_BR, 0)); + radeon_ring_write(ring, 0); /* Flush 3D cache */ - radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_RB3D_DC_FLUSH); - radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_ZC_FLUSH); + radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_RB3D_DC_FLUSH); + radeon_ring_write(ring, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_ZC_FLUSH); /* Wait until IDLE & CLEAN */ - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, (RADEON_WAIT_3D_IDLECLEAN | + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, (RADEON_WAIT_3D_IDLECLEAN | RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE)); - radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(rdev, rdev->config.r300.hdp_cntl | + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r300.hdp_cntl | RADEON_HDP_READ_BUFFER_INVALIDATE); - radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(rdev, rdev->config.r300.hdp_cntl); + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r300.hdp_cntl); /* Emit fence sequence & fire IRQ */ - radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0)); - radeon_ring_write(rdev, fence->seq); - radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0)); - radeon_ring_write(rdev, RADEON_SW_INT_FIRE); + radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0)); + radeon_ring_write(ring, RADEON_SW_INT_FIRE); } void r300_ring_start(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; unsigned gb_tile_config; int r; @@ -227,44 +230,44 @@ void r300_ring_start(struct radeon_device *rdev) break; } - r = radeon_ring_lock(rdev, 64); + r = radeon_ring_lock(rdev, ring, 64); if (r) { return; } - radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0)); + radeon_ring_write(ring, RADEON_ISYNC_ANY2D_IDLE3D | RADEON_ISYNC_ANY3D_IDLE2D | RADEON_ISYNC_WAIT_IDLEGUI | RADEON_ISYNC_CPSCRATCH_IDLEGUI); - radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0)); - radeon_ring_write(rdev, gb_tile_config); - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(R300_GB_TILE_CONFIG, 0)); + radeon_ring_write(ring, gb_tile_config); + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); - radeon_ring_write(rdev, PACKET0(R300_DST_PIPE_CONFIG, 0)); - radeon_ring_write(rdev, R300_PIPE_AUTO_CONFIG); - radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); - radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE); - radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(R300_DST_PIPE_CONFIG, 0)); + radeon_ring_write(ring, R300_PIPE_AUTO_CONFIG); + radeon_ring_write(ring, PACKET0(R300_GB_SELECT, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(R300_GB_ENABLE, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); + radeon_ring_write(ring, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_ZC_FLUSH | R300_ZC_FREE); + radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); + radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); - radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); - radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE); - radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(R300_GB_AA_CONFIG, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); + radeon_ring_write(ring, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_ZC_FLUSH | R300_ZC_FREE); + radeon_ring_write(ring, PACKET0(R300_GB_MSPOS0, 0)); + radeon_ring_write(ring, ((6 << R300_MS_X0_SHIFT) | (6 << R300_MS_Y0_SHIFT) | (6 << R300_MS_X1_SHIFT) | @@ -273,8 +276,8 @@ void r300_ring_start(struct radeon_device *rdev) (6 << R300_MS_Y2_SHIFT) | (6 << R300_MSBD0_Y_SHIFT) | (6 << R300_MSBD0_X_SHIFT))); - radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(R300_GB_MSPOS1, 0)); + radeon_ring_write(ring, ((6 << R300_MS_X3_SHIFT) | (6 << R300_MS_Y3_SHIFT) | (6 << R300_MS_X4_SHIFT) | @@ -282,16 +285,16 @@ void r300_ring_start(struct radeon_device *rdev) (6 << R300_MS_X5_SHIFT) | (6 << R300_MS_Y5_SHIFT) | (6 << R300_MSBD1_SHIFT))); - radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0)); - radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL); - radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(R300_GA_ENHANCE, 0)); + radeon_ring_write(ring, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL); + radeon_ring_write(ring, PACKET0(R300_GA_POLY_MODE, 0)); + radeon_ring_write(ring, R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE); - radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(R300_GA_ROUND_MODE, 0)); + radeon_ring_write(ring, R300_GEOMETRY_ROUND_NEAREST | R300_COLOR_ROUND_NEAREST); - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); } void r300_errata(struct radeon_device *rdev) @@ -375,26 +378,26 @@ void r300_gpu_init(struct radeon_device *rdev) rdev->num_gb_pipes, rdev->num_z_pipes); } -bool r300_gpu_is_lockup(struct radeon_device *rdev) +bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { u32 rbbm_status; int r; rbbm_status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(rbbm_status)) { - r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp); + r100_gpu_lockup_update(&rdev->config.r300.lockup, ring); return false; } /* force CP activities */ - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (!r) { /* PACKET2 NOP */ - radeon_ring_write(rdev, 0x80000000); - radeon_ring_write(rdev, 0x80000000); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + radeon_ring_unlock_commit(rdev, ring); } - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); - return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp); + ring->rptr = RREG32(RADEON_CP_RB_RPTR); + return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, ring); } int r300_asic_reset(struct radeon_device *rdev) @@ -701,7 +704,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } - if (p->keep_tiling_flags) { + if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) { ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); } else { @@ -765,7 +768,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, /* RB3D_COLORPITCH1 */ /* RB3D_COLORPITCH2 */ /* RB3D_COLORPITCH3 */ - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = r100_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("No reloc for ib[%d]=0x%04X\n", @@ -850,7 +853,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case 0x4F24: /* ZB_DEPTHPITCH */ - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = r100_cs_packet_next_reloc(p, &reloc); if (r) { DRM_ERROR("No reloc for ib[%d]=0x%04X\n", @@ -1396,6 +1399,12 @@ static int r300_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -1405,11 +1414,18 @@ static int r300_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } + return 0; } @@ -1434,11 +1450,14 @@ int r300_resume(struct radeon_device *rdev) r300_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return r300_startup(rdev); } int r300_suspend(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); r100_irq_disable(rdev); @@ -1539,7 +1558,14 @@ int r300_init(struct radeon_device *rdev) return r; } r300_set_reg_safe(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = r300_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 417fab81812f..666e28fe509c 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -199,6 +199,8 @@ static void r420_clock_resume(struct radeon_device *rdev) static void r420_cp_errata_init(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + /* RV410 and R420 can lock up if CP DMA to host memory happens * while the 2D engine is busy. * @@ -206,22 +208,24 @@ static void r420_cp_errata_init(struct radeon_device *rdev) * of the CP init, apparently. */ radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch); - radeon_ring_lock(rdev, 8); - radeon_ring_write(rdev, PACKET0(R300_CP_RESYNC_ADDR, 1)); - radeon_ring_write(rdev, rdev->config.r300.resync_scratch); - radeon_ring_write(rdev, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev); + radeon_ring_lock(rdev, ring, 8); + radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1)); + radeon_ring_write(ring, rdev->config.r300.resync_scratch); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); } static void r420_cp_errata_fini(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + /* Catch the RESYNC we dispatched all the way back, * at the very beginning of the CP init. */ - radeon_ring_lock(rdev, 8); - radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, R300_RB3D_DC_FINISH); - radeon_ring_unlock_commit(rdev); + radeon_ring_lock(rdev, ring, 8); + radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, R300_RB3D_DC_FINISH); + radeon_ring_unlock_commit(rdev, ring); radeon_scratch_free(rdev, rdev->config.r300.resync_scratch); } @@ -254,6 +258,12 @@ static int r420_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -264,11 +274,18 @@ static int r420_startup(struct radeon_device *rdev) return r; } r420_cp_errata_init(rdev); - r = r100_ib_init(rdev); + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } + return 0; } @@ -297,11 +314,14 @@ int r420_resume(struct radeon_device *rdev) r420_clock_resume(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return r420_startup(rdev); } int r420_suspend(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); r420_cp_errata_fini(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); @@ -414,7 +434,14 @@ int r420_init(struct radeon_device *rdev) return r; } r420_set_reg_safe(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = r420_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/r500_reg.h b/drivers/gpu/drm/radeon/r500_reg.h index fc437059918f..3bd8f1b1c606 100644 --- a/drivers/gpu/drm/radeon/r500_reg.h +++ b/drivers/gpu/drm/radeon/r500_reg.h @@ -573,6 +573,7 @@ #define AVIVO_TMDSA_CNTL 0x7880 # define AVIVO_TMDSA_CNTL_ENABLE (1 << 0) +# define AVIVO_TMDSA_CNTL_HDMI_EN (1 << 2) # define AVIVO_TMDSA_CNTL_HPD_MASK (1 << 4) # define AVIVO_TMDSA_CNTL_HPD_SELECT (1 << 8) # define AVIVO_TMDSA_CNTL_SYNC_PHASE (1 << 12) @@ -633,6 +634,7 @@ #define AVIVO_LVTMA_CNTL 0x7a80 # define AVIVO_LVTMA_CNTL_ENABLE (1 << 0) +# define AVIVO_LVTMA_CNTL_HDMI_EN (1 << 2) # define AVIVO_LVTMA_CNTL_HPD_MASK (1 << 4) # define AVIVO_LVTMA_CNTL_HPD_SELECT (1 << 8) # define AVIVO_LVTMA_CNTL_SYNC_PHASE (1 << 12) diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 3081d07f8de5..4ae1615e752f 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -187,6 +187,12 @@ static int r520_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -196,9 +202,15 @@ static int r520_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } return 0; @@ -223,6 +235,8 @@ int r520_resume(struct radeon_device *rdev) rv515_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return r520_startup(rdev); } @@ -292,7 +306,14 @@ int r520_init(struct radeon_device *rdev) if (r) return r; rv515_set_safe_registers(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = r520_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 9cdda0b3b081..4f08e5e6ee9d 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1344,7 +1344,7 @@ int r600_gpu_soft_reset(struct radeon_device *rdev) return 0; } -bool r600_gpu_is_lockup(struct radeon_device *rdev) +bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { u32 srbm_status; u32 grbm_status; @@ -1361,19 +1361,19 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev) grbm_status = RREG32(R_008010_GRBM_STATUS); grbm_status2 = RREG32(R_008014_GRBM_STATUS2); if (!G_008010_GUI_ACTIVE(grbm_status)) { - r100_gpu_lockup_update(lockup, &rdev->cp); + r100_gpu_lockup_update(lockup, ring); return false; } /* force CP activities */ - r = radeon_ring_lock(rdev, 2); + r = radeon_ring_lock(rdev, ring, 2); if (!r) { /* PACKET2 NOP */ - radeon_ring_write(rdev, 0x80000000); - radeon_ring_write(rdev, 0x80000000); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + radeon_ring_unlock_commit(rdev, ring); } - rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); - return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp); + ring->rptr = RREG32(ring->rptr_reg); + return r100_gpu_cp_is_lockup(rdev, lockup, ring); } int r600_asic_reset(struct radeon_device *rdev) @@ -2144,27 +2144,28 @@ static int r600_cp_load_microcode(struct radeon_device *rdev) int r600_cp_start(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; uint32_t cp_me; - r = radeon_ring_lock(rdev, 7); + r = radeon_ring_lock(rdev, ring, 7); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); return r; } - radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); - radeon_ring_write(rdev, 0x1); + radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(ring, 0x1); if (rdev->family >= CHIP_RV770) { - radeon_ring_write(rdev, 0x0); - radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1); + radeon_ring_write(ring, 0x0); + radeon_ring_write(ring, rdev->config.rv770.max_hw_contexts - 1); } else { - radeon_ring_write(rdev, 0x3); - radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1); + radeon_ring_write(ring, 0x3); + radeon_ring_write(ring, rdev->config.r600.max_hw_contexts - 1); } - radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_unlock_commit(rdev, ring); cp_me = 0xff; WREG32(R_0086D8_CP_ME_CNTL, cp_me); @@ -2173,6 +2174,7 @@ int r600_cp_start(struct radeon_device *rdev) int r600_cp_resume(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 tmp; u32 rb_bufsz; int r; @@ -2184,13 +2186,13 @@ int r600_cp_resume(struct radeon_device *rdev) WREG32(GRBM_SOFT_RESET, 0); /* Set ring buffer size */ - rb_bufsz = drm_order(rdev->cp.ring_size / 8); + rb_bufsz = drm_order(ring->ring_size / 8); tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif WREG32(CP_RB_CNTL, tmp); - WREG32(CP_SEM_WAIT_TIMER, 0x4); + WREG32(CP_SEM_WAIT_TIMER, 0x0); /* Set the write pointer delay */ WREG32(CP_RB_WPTR_DELAY, 0); @@ -2198,8 +2200,8 @@ int r600_cp_resume(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); - rdev->cp.wptr = 0; - WREG32(CP_RB_WPTR, rdev->cp.wptr); + ring->wptr = 0; + WREG32(CP_RB_WPTR, ring->wptr); /* set the wb address whether it's enabled or not */ WREG32(CP_RB_RPTR_ADDR, @@ -2217,42 +2219,36 @@ int r600_cp_resume(struct radeon_device *rdev) mdelay(1); WREG32(CP_RB_CNTL, tmp); - WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8); + WREG32(CP_RB_BASE, ring->gpu_addr >> 8); WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); - rdev->cp.rptr = RREG32(CP_RB_RPTR); + ring->rptr = RREG32(CP_RB_RPTR); r600_cp_start(rdev); - rdev->cp.ready = true; - r = radeon_ring_test(rdev); + ring->ready = true; + r = radeon_ring_test(rdev, ring); if (r) { - rdev->cp.ready = false; + ring->ready = false; return r; } return 0; } -void r600_cp_commit(struct radeon_device *rdev) -{ - WREG32(CP_RB_WPTR, rdev->cp.wptr); - (void)RREG32(CP_RB_WPTR); -} - -void r600_ring_init(struct radeon_device *rdev, unsigned ring_size) +void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size) { u32 rb_bufsz; /* Align ring size */ rb_bufsz = drm_order(ring_size / 8); ring_size = (1 << (rb_bufsz + 1)) * 4; - rdev->cp.ring_size = ring_size; - rdev->cp.align_mask = 16 - 1; + ring->ring_size = ring_size; + ring->align_mask = 16 - 1; } void r600_cp_fini(struct radeon_device *rdev) { r600_cp_stop(rdev); - radeon_ring_fini(rdev); + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); } @@ -2271,11 +2267,11 @@ void r600_scratch_init(struct radeon_device *rdev) } } -int r600_ring_test(struct radeon_device *rdev) +int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) { uint32_t scratch; uint32_t tmp = 0; - unsigned i; + unsigned i, ridx = radeon_ring_index(rdev, ring); int r; r = radeon_scratch_get(rdev, &scratch); @@ -2284,16 +2280,16 @@ int r600_ring_test(struct radeon_device *rdev) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, 3); + r = radeon_ring_lock(rdev, ring, 3); if (r) { - DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r); radeon_scratch_free(rdev, scratch); return r; } - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); - radeon_ring_write(rdev, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) @@ -2301,10 +2297,10 @@ int r600_ring_test(struct radeon_device *rdev) DRM_UDELAY(1); } if (i < rdev->usec_timeout) { - DRM_INFO("ring test succeeded in %d usecs\n", i); + DRM_INFO("ring test on %d succeeded in %d usecs\n", ridx, i); } else { - DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", + ridx, scratch, tmp); r = -EINVAL; } radeon_scratch_free(rdev, scratch); @@ -2314,49 +2310,63 @@ int r600_ring_test(struct radeon_device *rdev) void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { + struct radeon_ring *ring = &rdev->ring[fence->ring]; + if (rdev->wb.use_event) { - u64 addr = rdev->wb.gpu_addr + R600_WB_EVENT_OFFSET + - (u64)(rdev->fence_drv.scratch_reg - rdev->scratch.reg_base); + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; /* flush read cache over gart */ - radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA | + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_VC_ACTION_ENA | PACKET3_SH_ACTION_ENA); - radeon_ring_write(rdev, 0xFFFFFFFF); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 10); /* poll interval */ + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ /* EVENT_WRITE_EOP - flush caches, send int */ - radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); - radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5)); - radeon_ring_write(rdev, addr & 0xffffffff); - radeon_ring_write(rdev, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2)); - radeon_ring_write(rdev, fence->seq); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, 0); } else { /* flush read cache over gart */ - radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA | + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_VC_ACTION_ENA | PACKET3_SH_ACTION_ENA); - radeon_ring_write(rdev, 0xFFFFFFFF); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 10); /* poll interval */ - radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); - radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0)); + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ + radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0)); /* wait for 3D idle clean */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); - radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); /* Emit fence sequence & fire IRQ */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); - radeon_ring_write(rdev, fence->seq); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, ((rdev->fence_drv[fence->ring].scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(ring, fence->seq); /* CP_INTERRUPT packet 3 no longer exists, use packet 0 */ - radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0)); - radeon_ring_write(rdev, RB_INT_STAT); + radeon_ring_write(ring, PACKET0(CP_INT_STATUS, 0)); + radeon_ring_write(ring, RB_INT_STAT); } } +void r600_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; + + radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); +} + int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -2409,6 +2419,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg) int r600_startup(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; /* enable pcie gen2 link */ @@ -2447,6 +2458,12 @@ int r600_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -2456,7 +2473,10 @@ int r600_startup(struct radeon_device *rdev) } r600_irq_set(rdev); - r = radeon_ring_init(rdev, rdev->cp.ring_size); + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, + R600_CP_RB_RPTR, R600_CP_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) return r; r = r600_cp_load_microcode(rdev); @@ -2466,6 +2486,17 @@ int r600_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + DRM_ERROR("radeon: failed testing IB (%d).\n", r); + rdev->accel_working = false; + return r; + } + return 0; } @@ -2494,18 +2525,13 @@ int r600_resume(struct radeon_device *rdev) /* post card */ atom_asic_init(rdev->mode_info.atom_context); + rdev->accel_working = true; r = r600_startup(rdev); if (r) { DRM_ERROR("r600 startup failed on resume\n"); return r; } - r = r600_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d).\n", r); - return r; - } - r = r600_audio_init(rdev); if (r) { DRM_ERROR("radeon: audio resume failed\n"); @@ -2518,13 +2544,14 @@ int r600_resume(struct radeon_device *rdev) int r600_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + radeon_ib_pool_suspend(rdev); + r600_blit_suspend(rdev); /* FIXME: we should wait for ring to be empty */ r600_cp_stop(rdev); - rdev->cp.ready = false; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); - r600_blit_suspend(rdev); return 0; } @@ -2595,8 +2622,8 @@ int r600_init(struct radeon_device *rdev) if (r) return r; - rdev->cp.ring_obj = NULL; - r600_ring_init(rdev, 1024 * 1024); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -2605,30 +2632,24 @@ int r600_init(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = r600_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r600_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); rdev->accel_working = false; } - if (rdev->accel_working) { - r = radeon_ib_pool_init(rdev); - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } else { - r = r600_ib_test(rdev); - if (r) { - dev_err(rdev->dev, "IB test failed (%d).\n", r); - rdev->accel_working = false; - } - } - } r = r600_audio_init(rdev); if (r) @@ -2643,12 +2664,13 @@ void r600_fini(struct radeon_device *rdev) r600_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - radeon_ib_pool_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_agp_fini(rdev); radeon_gem_fini(rdev); + radeon_semaphore_driver_fini(rdev); radeon_fence_driver_fini(rdev); radeon_bo_fini(rdev); radeon_atombios_fini(rdev); @@ -2662,18 +2684,20 @@ void r600_fini(struct radeon_device *rdev) */ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + /* FIXME: implement */ - radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | #endif (ib->gpu_addr & 0xFFFFFFFC)); - radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(rdev, ib->length_dw); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); + radeon_ring_write(ring, ib->length_dw); } -int r600_ib_test(struct radeon_device *rdev) +int r600_ib_test(struct radeon_device *rdev, int ring) { struct radeon_ib *ib; uint32_t scratch; @@ -2687,7 +2711,7 @@ int r600_ib_test(struct radeon_device *rdev) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, &ib); + r = radeon_ib_get(rdev, ring, &ib, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); return r; @@ -2728,7 +2752,7 @@ int r600_ib_test(struct radeon_device *rdev) DRM_UDELAY(1); } if (i < rdev->usec_timeout) { - DRM_INFO("ib test succeeded in %u usecs\n", i); + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib->fence->ring, i); } else { DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); @@ -3075,7 +3099,7 @@ int r600_irq_set(struct radeon_device *rdev) hpd3 = RREG32(DC_HOT_PLUG_DETECT3_INT_CONTROL) & ~DC_HPDx_INT_EN; } - if (rdev->irq.sw_int) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; @@ -3459,11 +3483,11 @@ restart_ih: case 177: /* CP_INT in IB1 */ case 178: /* CP_INT in IB2 */ DRM_DEBUG("IH: CP int: 0x%08x\n", src_data); - radeon_fence_process(rdev); + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; case 181: /* CP EOP event */ DRM_DEBUG("IH: CP EOP\n"); - radeon_fence_process(rdev); + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); @@ -3496,30 +3520,6 @@ restart_ih: */ #if defined(CONFIG_DEBUG_FS) -static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data) -{ - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct radeon_device *rdev = dev->dev_private; - unsigned count, i, j; - - radeon_ring_free_size(rdev); - count = (rdev->cp.ring_size / 4) - rdev->cp.ring_free_dw; - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT)); - seq_printf(m, "CP_RB_WPTR 0x%08x\n", RREG32(CP_RB_WPTR)); - seq_printf(m, "CP_RB_RPTR 0x%08x\n", RREG32(CP_RB_RPTR)); - seq_printf(m, "driver's copy of the CP_RB_WPTR 0x%08x\n", rdev->cp.wptr); - seq_printf(m, "driver's copy of the CP_RB_RPTR 0x%08x\n", rdev->cp.rptr); - seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); - seq_printf(m, "%u dwords in ring\n", count); - i = rdev->cp.rptr; - for (j = 0; j <= count; j++) { - seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); - i = (i + 1) & rdev->cp.ptr_mask; - } - return 0; -} - static int r600_debugfs_mc_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *) m->private; @@ -3533,7 +3533,6 @@ static int r600_debugfs_mc_info(struct seq_file *m, void *data) static struct drm_info_list r600_mc_info_list[] = { {"r600_mc_info", r600_debugfs_mc_info, 0, NULL}, - {"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL}, }; #endif diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index 846fae576399..ba66f3093d46 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -36,7 +36,7 @@ */ static int r600_audio_chipset_supported(struct radeon_device *rdev) { - return (rdev->family >= CHIP_R600 && rdev->family < CHIP_CEDAR) + return (rdev->family >= CHIP_R600 && !ASIC_IS_DCE5(rdev)) || rdev->family == CHIP_RS600 || rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740; @@ -161,8 +161,18 @@ static void r600_audio_update_hdmi(unsigned long param) */ static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable) { + u32 value = 0; DRM_INFO("%s audio support\n", enable ? "Enabling" : "Disabling"); - WREG32_P(R600_AUDIO_ENABLE, enable ? 0x81000000 : 0x0, ~0x81000000); + if (ASIC_IS_DCE4(rdev)) { + if (enable) { + value |= 0x81000000; /* Required to enable audio */ + value |= 0x0e1000f0; /* fglrx sets that too */ + } + WREG32(EVERGREEN_AUDIO_ENABLE, value); + } else { + WREG32_P(R600_AUDIO_ENABLE, + enable ? 0x81000000 : 0x0, ~0x81000000); + } rdev->audio_enabled = enable; } @@ -248,22 +258,33 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock) return; } - switch (dig->dig_encoder) { - case 0: - WREG32(R600_AUDIO_PLL1_MUL, base_rate * 50); - WREG32(R600_AUDIO_PLL1_DIV, clock * 100); - WREG32(R600_AUDIO_CLK_SRCSEL, 0); - break; - - case 1: - WREG32(R600_AUDIO_PLL2_MUL, base_rate * 50); - WREG32(R600_AUDIO_PLL2_DIV, clock * 100); - WREG32(R600_AUDIO_CLK_SRCSEL, 1); - break; - default: - dev_err(rdev->dev, "Unsupported DIG on encoder 0x%02X\n", - radeon_encoder->encoder_id); - return; + if (ASIC_IS_DCE4(rdev)) { + /* TODO: other PLLs? */ + WREG32(EVERGREEN_AUDIO_PLL1_MUL, base_rate * 10); + WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10); + WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071); + + /* Some magic trigger or src sel? */ + WREG32_P(0x5ac, 0x01, ~0x77); + } else { + switch (dig->dig_encoder) { + case 0: + WREG32(R600_AUDIO_PLL1_MUL, base_rate * 50); + WREG32(R600_AUDIO_PLL1_DIV, clock * 100); + WREG32(R600_AUDIO_CLK_SRCSEL, 0); + break; + + case 1: + WREG32(R600_AUDIO_PLL2_MUL, base_rate * 50); + WREG32(R600_AUDIO_PLL2_DIV, clock * 100); + WREG32(R600_AUDIO_CLK_SRCSEL, 1); + break; + default: + dev_err(rdev->dev, + "Unsupported DIG on encoder 0x%02X\n", + radeon_encoder->encoder_id); + return; + } } } diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index e09d2818f949..d996f4381130 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -50,6 +50,7 @@ static void set_render_target(struct radeon_device *rdev, int format, int w, int h, u64 gpu_addr) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 cb_color_info; int pitch, slice; @@ -63,38 +64,38 @@ set_render_target(struct radeon_device *rdev, int format, pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1; - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, gpu_addr >> 8); if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { - radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); - radeon_ring_write(rdev, 2 << 0); + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); + radeon_ring_write(ring, 2 << 0); } - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, (pitch << 0) | (slice << 10)); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, (pitch << 0) | (slice << 10)); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 0); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, cb_color_info); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, cb_color_info); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 0); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 0); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 0); } /* emits 5dw */ @@ -103,6 +104,7 @@ cp_set_surface_sync(struct radeon_device *rdev, u32 sync_type, u32 size, u64 mc_addr) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 cp_coher_size; if (size == 0xffffffff) @@ -110,17 +112,18 @@ cp_set_surface_sync(struct radeon_device *rdev, else cp_coher_size = ((size + 255) >> 8); - radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(rdev, sync_type); - radeon_ring_write(rdev, cp_coher_size); - radeon_ring_write(rdev, mc_addr >> 8); - radeon_ring_write(rdev, 10); /* poll interval */ + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, sync_type); + radeon_ring_write(ring, cp_coher_size); + radeon_ring_write(ring, mc_addr >> 8); + radeon_ring_write(ring, 10); /* poll interval */ } /* emits 21dw + 1 surface sync = 26dw */ static void set_shaders(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u64 gpu_addr; u32 sq_pgm_resources; @@ -129,35 +132,35 @@ set_shaders(struct radeon_device *rdev) /* VS */ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, sq_pgm_resources); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, sq_pgm_resources); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 0); /* PS */ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, sq_pgm_resources | (1 << 28)); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, sq_pgm_resources | (1 << 28)); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 2); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 2); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, 0); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, 0); gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); @@ -167,6 +170,7 @@ set_shaders(struct radeon_device *rdev) static void set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 sq_vtx_constant_word2; sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | @@ -175,15 +179,15 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); #endif - radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); - radeon_ring_write(rdev, 0x460); - radeon_ring_write(rdev, gpu_addr & 0xffffffff); - radeon_ring_write(rdev, 48 - 1); - radeon_ring_write(rdev, sq_vtx_constant_word2); - radeon_ring_write(rdev, 1 << 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(ring, 0x460); + radeon_ring_write(ring, gpu_addr & 0xffffffff); + radeon_ring_write(ring, 48 - 1); + radeon_ring_write(ring, sq_vtx_constant_word2); + radeon_ring_write(ring, 1 << 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30); if ((rdev->family == CHIP_RV610) || (rdev->family == CHIP_RV620) || @@ -203,6 +207,7 @@ set_tex_resource(struct radeon_device *rdev, int format, int w, int h, int pitch, u64 gpu_addr, u32 size) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; if (h < 1) @@ -225,15 +230,15 @@ set_tex_resource(struct radeon_device *rdev, cp_set_surface_sync(rdev, PACKET3_TC_ACTION_ENA, size, gpu_addr); - radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, sq_tex_resource_word0); - radeon_ring_write(rdev, sq_tex_resource_word1); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, gpu_addr >> 8); - radeon_ring_write(rdev, sq_tex_resource_word4); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30); + radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, sq_tex_resource_word0); + radeon_ring_write(ring, sq_tex_resource_word1); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, gpu_addr >> 8); + radeon_ring_write(ring, sq_tex_resource_word4); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30); } /* emits 12 */ @@ -241,43 +246,45 @@ static void set_scissors(struct radeon_device *rdev, int x1, int y1, int x2, int y2) { - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); - radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); - - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); - - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); + radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); } /* emits 10 */ static void draw_auto(struct radeon_device *rdev) { - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); - radeon_ring_write(rdev, DI_PT_RECTLIST); + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, DI_PT_RECTLIST); - radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 2) | #endif DI_INDEX_SIZE_16_BIT); - radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); - radeon_ring_write(rdev, 1); + radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(ring, 1); - radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); - radeon_ring_write(rdev, 3); - radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(ring, 3); + radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); } @@ -285,6 +292,7 @@ draw_auto(struct radeon_device *rdev) static void set_default_state(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; @@ -440,24 +448,24 @@ set_default_state(struct radeon_device *rdev) /* emit an IB pointing at default state */ dwords = ALIGN(rdev->r600_blit.state_len, 0x10); gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; - radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | #endif (gpu_addr & 0xFFFFFFFC)); - radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); - radeon_ring_write(rdev, dwords); + radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(ring, dwords); /* SQ config */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6)); - radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); - radeon_ring_write(rdev, sq_config); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); - radeon_ring_write(rdev, sq_thread_resource_mgmt); - radeon_ring_write(rdev, sq_stack_resource_mgmt_1); - radeon_ring_write(rdev, sq_stack_resource_mgmt_2); + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6)); + radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, sq_config); + radeon_ring_write(ring, sq_gpr_resource_mgmt_1); + radeon_ring_write(ring, sq_gpr_resource_mgmt_2); + radeon_ring_write(ring, sq_thread_resource_mgmt); + radeon_ring_write(ring, sq_stack_resource_mgmt_1); + radeon_ring_write(ring, sq_stack_resource_mgmt_2); } static uint32_t i2f(uint32_t input) @@ -611,16 +619,17 @@ void r600_blit_fini(struct radeon_device *rdev) radeon_bo_unref(&rdev->r600_blit.shader_obj); } -static int r600_vb_ib_get(struct radeon_device *rdev) +static int r600_vb_ib_get(struct radeon_device *rdev, unsigned size) { int r; - r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); + r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, + &rdev->r600_blit.vb_ib, size); if (r) { DRM_ERROR("failed to get IB for vertex buffer\n"); return r; } - rdev->r600_blit.vb_total = 64*1024; + rdev->r600_blit.vb_total = size; rdev->r600_blit.vb_used = 0; return 0; } @@ -679,15 +688,12 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages, int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; int ring_size; int num_loops = 0; int dwords_per_loop = rdev->r600_blit.ring_size_per_loop; - r = r600_vb_ib_get(rdev); - if (r) - return r; - /* num loops */ while (num_gpu_pages) { num_gpu_pages -= @@ -696,10 +702,15 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages) num_loops++; } + /* 48 bytes for vertex per loop */ + r = r600_vb_ib_get(rdev, (num_loops*48)+256); + if (r) + return r; + /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; ring_size += rdev->r600_blit.ring_size_common; - r = radeon_ring_lock(rdev, ring_size); + r = radeon_ring_lock(rdev, ring, ring_size); if (r) return r; @@ -718,7 +729,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) if (fence) r = radeon_fence_emit(rdev, fence); - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); } void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index c9db4931913f..84c546250955 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -1815,7 +1815,7 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev, dev_priv->ring.size_l2qw); #endif - RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4); + RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x0); /* Set the write pointer delay */ RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index cb1acffd2430..38ce5d0427e3 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -941,7 +941,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_depth_control = radeon_get_ib_value(p, idx); break; case R_028010_DB_DEPTH_INFO: - if (!p->keep_tiling_flags && + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) && r600_cs_packet_next_is_pkt3_nop(p)) { r = r600_cs_packet_next_reloc(p, &reloc); if (r) { @@ -993,7 +993,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case R_0280B4_CB_COLOR5_INFO: case R_0280B8_CB_COLOR6_INFO: case R_0280BC_CB_COLOR7_INFO: - if (!p->keep_tiling_flags && + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) && r600_cs_packet_next_is_pkt3_nop(p)) { r = r600_cs_packet_next_reloc(p, &reloc); if (r) { @@ -1293,7 +1293,7 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, mip_offset <<= 8; word0 = radeon_get_ib_value(p, idx + 0); - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (tiling_flags & RADEON_TILING_MACRO) word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); else if (tiling_flags & RADEON_TILING_MICRO) @@ -1625,7 +1625,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); - if (!p->keep_tiling_flags) { + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f5ac7e788d81..0b5920671450 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -196,6 +196,13 @@ static void r600_hdmi_videoinfoframe( frame[0xD] = (right_bar >> 8); r600_hdmi_infoframe_checksum(0x82, 0x02, 0x0D, frame); + /* Our header values (type, version, length) should be alright, Intel + * is using the same. Checksum function also seems to be OK, it works + * fine for audio infoframe. However calculated value is always lower + * by 2 in comparison to fglrx. It breaks displaying anything in case + * of TVs that strictly check the checksum. Hack it manually here to + * workaround this issue. */ + frame[0x0] += 2; WREG32(offset+R600_HDMI_VIDEOINFOFRAME_0, frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); @@ -313,7 +320,7 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod struct radeon_device *rdev = dev->dev_private; uint32_t offset = to_radeon_encoder(encoder)->hdmi_offset; - if (ASIC_IS_DCE4(rdev)) + if (ASIC_IS_DCE5(rdev)) return; if (!offset) @@ -455,13 +462,31 @@ static void r600_hdmi_assign_block(struct drm_encoder *encoder) struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + u16 eg_offsets[] = { + EVERGREEN_CRTC0_REGISTER_OFFSET, + EVERGREEN_CRTC1_REGISTER_OFFSET, + EVERGREEN_CRTC2_REGISTER_OFFSET, + EVERGREEN_CRTC3_REGISTER_OFFSET, + EVERGREEN_CRTC4_REGISTER_OFFSET, + EVERGREEN_CRTC5_REGISTER_OFFSET, + }; + if (!dig) { dev_err(rdev->dev, "Enabling HDMI on non-dig encoder\n"); return; } - if (ASIC_IS_DCE4(rdev)) { + if (ASIC_IS_DCE5(rdev)) { /* TODO */ + } else if (ASIC_IS_DCE4(rdev)) { + if (dig->dig_encoder >= ARRAY_SIZE(eg_offsets)) { + dev_err(rdev->dev, "Enabling HDMI on unknown dig\n"); + return; + } + radeon_encoder->hdmi_offset = EVERGREEN_HDMI_BASE + + eg_offsets[dig->dig_encoder]; + radeon_encoder->hdmi_config_offset = radeon_encoder->hdmi_offset + + EVERGREEN_HDMI_CONFIG_OFFSET; } else if (ASIC_IS_DCE3(rdev)) { radeon_encoder->hdmi_offset = dig->dig_encoder ? R600_HDMI_BLOCK3 : R600_HDMI_BLOCK1; @@ -484,7 +509,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder) struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); uint32_t offset; - if (ASIC_IS_DCE4(rdev)) + if (ASIC_IS_DCE5(rdev)) return; if (!radeon_encoder->hdmi_offset) { @@ -497,16 +522,24 @@ void r600_hdmi_enable(struct drm_encoder *encoder) } offset = radeon_encoder->hdmi_offset; - if (ASIC_IS_DCE32(rdev) && !ASIC_IS_DCE4(rdev)) { + if (ASIC_IS_DCE5(rdev)) { + /* TODO */ + } else if (ASIC_IS_DCE4(rdev)) { + WREG32_P(radeon_encoder->hdmi_config_offset + 0xc, 0x1, ~0x1); + } else if (ASIC_IS_DCE32(rdev)) { WREG32_P(radeon_encoder->hdmi_config_offset + 0x4, 0x1, ~0x1); - } else if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { + } else if (ASIC_IS_DCE3(rdev)) { + /* TODO */ + } else if (rdev->family >= CHIP_R600) { switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1: - WREG32_P(AVIVO_TMDSA_CNTL, 0x4, ~0x4); + WREG32_P(AVIVO_TMDSA_CNTL, AVIVO_TMDSA_CNTL_HDMI_EN, + ~AVIVO_TMDSA_CNTL_HDMI_EN); WREG32(offset + R600_HDMI_ENABLE, 0x101); break; case ENCODER_OBJECT_ID_INTERNAL_LVTM1: - WREG32_P(AVIVO_LVTMA_CNTL, 0x4, ~0x4); + WREG32_P(AVIVO_LVTMA_CNTL, AVIVO_LVTMA_CNTL_HDMI_EN, + ~AVIVO_LVTMA_CNTL_HDMI_EN); WREG32(offset + R600_HDMI_ENABLE, 0x105); break; default: @@ -518,8 +551,8 @@ void r600_hdmi_enable(struct drm_encoder *encoder) if (rdev->irq.installed && rdev->family != CHIP_RS600 && rdev->family != CHIP_RS690 - && rdev->family != CHIP_RS740) { - + && rdev->family != CHIP_RS740 + && !ASIC_IS_DCE4(rdev)) { /* if irq is available use it */ rdev->irq.hdmi[offset == R600_HDMI_BLOCK1 ? 0 : 1] = true; radeon_irq_set(rdev); @@ -544,7 +577,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); uint32_t offset; - if (ASIC_IS_DCE4(rdev)) + if (ASIC_IS_DCE5(rdev)) return; offset = radeon_encoder->hdmi_offset; @@ -563,16 +596,22 @@ void r600_hdmi_disable(struct drm_encoder *encoder) /* disable polling */ r600_audio_disable_polling(encoder); - if (ASIC_IS_DCE32(rdev) && !ASIC_IS_DCE4(rdev)) { + if (ASIC_IS_DCE5(rdev)) { + /* TODO */ + } else if (ASIC_IS_DCE4(rdev)) { + WREG32_P(radeon_encoder->hdmi_config_offset + 0xc, 0, ~0x1); + } else if (ASIC_IS_DCE32(rdev)) { WREG32_P(radeon_encoder->hdmi_config_offset + 0x4, 0, ~0x1); } else if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1: - WREG32_P(AVIVO_TMDSA_CNTL, 0, ~0x4); + WREG32_P(AVIVO_TMDSA_CNTL, 0, + ~AVIVO_TMDSA_CNTL_HDMI_EN); WREG32(offset + R600_HDMI_ENABLE, 0); break; case ENCODER_OBJECT_ID_INTERNAL_LVTM1: - WREG32_P(AVIVO_LVTMA_CNTL, 0, ~0x4); + WREG32_P(AVIVO_LVTMA_CNTL, 0, + ~AVIVO_LVTMA_CNTL_HDMI_EN); WREG32(offset + R600_HDMI_ENABLE, 0); break; default: diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index bfe1b5d92afe..3ee1fd7ef394 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -831,6 +831,8 @@ #define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 #define PACKET3_INDIRECT_BUFFER_MP 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 +# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) +# define PACKET3_SEM_SEL_WAIT (0x7 << 29) #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8227e76b5c70..73e05cb85eca 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -107,6 +107,21 @@ extern int radeon_msi; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 +/* max number of rings */ +#define RADEON_NUM_RINGS 3 + +/* internal ring indices */ +/* r1xx+ has gfx CP ring */ +#define RADEON_RING_TYPE_GFX_INDEX 0 + +/* cayman has 2 compute CP rings */ +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2 + +/* hardcode those limit for now */ +#define RADEON_VA_RESERVED_SIZE (8 << 20) +#define RADEON_IB_VM_MAX_SIZE (64 << 10) + /* * Errata workarounds. */ @@ -192,14 +207,15 @@ extern int sumo_get_temp(struct radeon_device *rdev); */ struct radeon_fence_driver { uint32_t scratch_reg; + uint64_t gpu_addr; + volatile uint32_t *cpu_addr; atomic_t seq; uint32_t last_seq; unsigned long last_jiffies; unsigned long last_timeout; wait_queue_head_t queue; - rwlock_t lock; struct list_head created; - struct list_head emited; + struct list_head emitted; struct list_head signaled; bool initialized; }; @@ -210,21 +226,26 @@ struct radeon_fence { struct list_head list; /* protected by radeon_fence.lock */ uint32_t seq; - bool emited; + bool emitted; bool signaled; + /* RB, DMA, etc. */ + int ring; + struct radeon_semaphore *semaphore; }; +int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence); +int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void radeon_fence_process(struct radeon_device *rdev); +void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); -int radeon_fence_wait_next(struct radeon_device *rdev); -int radeon_fence_wait_last(struct radeon_device *rdev); +int radeon_fence_wait_next(struct radeon_device *rdev, int ring); +int radeon_fence_wait_last(struct radeon_device *rdev, int ring); struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); +int radeon_fence_count_emitted(struct radeon_device *rdev, int ring); /* * Tiling registers @@ -246,6 +267,21 @@ struct radeon_mman { bool initialized; }; +/* bo virtual address in a specific vm */ +struct radeon_bo_va { + /* bo list is protected by bo being reserved */ + struct list_head bo_list; + /* vm list is protected by vm mutex */ + struct list_head vm_list; + /* constant after initialization */ + struct radeon_vm *vm; + struct radeon_bo *bo; + uint64_t soffset; + uint64_t eoffset; + uint32_t flags; + bool valid; +}; + struct radeon_bo { /* Protected by gem.mutex */ struct list_head list; @@ -259,6 +295,10 @@ struct radeon_bo { u32 tiling_flags; u32 pitch; int surface_reg; + /* list of all virtual address to which this bo + * is associated to + */ + struct list_head va; /* Constant after initialization */ struct radeon_device *rdev; struct drm_gem_object gem_base; @@ -274,6 +314,48 @@ struct radeon_bo_list { u32 tiling_flags; }; +/* sub-allocation manager, it has to be protected by another lock. + * By conception this is an helper for other part of the driver + * like the indirect buffer or semaphore, which both have their + * locking. + * + * Principe is simple, we keep a list of sub allocation in offset + * order (first entry has offset == 0, last entry has the highest + * offset). + * + * When allocating new object we first check if there is room at + * the end total_size - (last_object_offset + last_object_size) >= + * alloc_size. If so we allocate new object there. + * + * When there is not enough room at the end, we start waiting for + * each sub object until we reach object_offset+object_size >= + * alloc_size, this object then become the sub object we return. + * + * Alignment can't be bigger than page size. + * + * Hole are not considered for allocation to keep things simple. + * Assumption is that there won't be hole (all object on same + * alignment). + */ +struct radeon_sa_manager { + struct radeon_bo *bo; + struct list_head sa_bo; + unsigned size; + uint64_t gpu_addr; + void *cpu_ptr; + uint32_t domain; +}; + +struct radeon_sa_bo; + +/* sub-allocation buffer */ +struct radeon_sa_bo { + struct list_head list; + struct radeon_sa_manager *manager; + unsigned offset; + unsigned size; +}; + /* * GEM objects. */ @@ -303,6 +385,46 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv, uint32_t handle); /* + * Semaphores. + */ +struct radeon_ring; + +#define RADEON_SEMAPHORE_BO_SIZE 256 + +struct radeon_semaphore_driver { + rwlock_t lock; + struct list_head bo; +}; + +struct radeon_semaphore_bo; + +/* everything here is constant */ +struct radeon_semaphore { + struct list_head list; + uint64_t gpu_addr; + uint32_t *cpu_ptr; + struct radeon_semaphore_bo *bo; +}; + +struct radeon_semaphore_bo { + struct list_head list; + struct radeon_ib *ib; + struct list_head free; + struct radeon_semaphore semaphores[RADEON_SEMAPHORE_BO_SIZE/8]; + unsigned nused; +}; + +void radeon_semaphore_driver_fini(struct radeon_device *rdev); +int radeon_semaphore_create(struct radeon_device *rdev, + struct radeon_semaphore **semaphore); +void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, + struct radeon_semaphore *semaphore); +void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, + struct radeon_semaphore *semaphore); +void radeon_semaphore_free(struct radeon_device *rdev, + struct radeon_semaphore *semaphore); + +/* * GART structures, functions & helpers */ struct radeon_mc; @@ -310,6 +432,7 @@ struct radeon_mc; #define RADEON_GPU_PAGE_SIZE 4096 #define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1) #define RADEON_GPU_PAGE_SHIFT 12 +#define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK) struct radeon_gart { dma_addr_t table_addr; @@ -320,7 +443,6 @@ struct radeon_gart { unsigned table_size; struct page **pages; dma_addr_t *pages_addr; - bool *ttm_alloced; bool ready; }; @@ -434,7 +556,7 @@ union radeon_irq_stat_regs { struct radeon_irq { bool installed; - bool sw_int; + bool sw_int[RADEON_NUM_RINGS]; bool crtc_vblank_int[RADEON_MAX_CRTCS]; bool pflip[RADEON_MAX_CRTCS]; wait_queue_head_t vblank_queue; @@ -444,7 +566,7 @@ struct radeon_irq { wait_queue_head_t idle_queue; bool hdmi[RADEON_MAX_HDMI_BLOCKS]; spinlock_t sw_lock; - int sw_refcount; + int sw_refcount[RADEON_NUM_RINGS]; union radeon_irq_stat_regs stat_regs; spinlock_t pflip_lock[RADEON_MAX_CRTCS]; int pflip_refcount[RADEON_MAX_CRTCS]; @@ -452,22 +574,23 @@ struct radeon_irq { int radeon_irq_kms_init(struct radeon_device *rdev); void radeon_irq_kms_fini(struct radeon_device *rdev); -void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev); -void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev); +void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); +void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); /* - * CP & ring. + * CP & rings. */ + struct radeon_ib { - struct list_head list; + struct radeon_sa_bo sa_bo; unsigned idx; + uint32_t length_dw; uint64_t gpu_addr; - struct radeon_fence *fence; uint32_t *ptr; - uint32_t length_dw; - bool free; + struct radeon_fence *fence; + unsigned vm_id; }; /* @@ -475,20 +598,22 @@ struct radeon_ib { * mutex protects scheduled_ibs, ready, alloc_bm */ struct radeon_ib_pool { - struct mutex mutex; - struct radeon_bo *robj; - struct list_head bogus_ib; - struct radeon_ib ibs[RADEON_IB_POOL_SIZE]; - bool ready; - unsigned head_id; + struct mutex mutex; + struct radeon_sa_manager sa_manager; + struct radeon_ib ibs[RADEON_IB_POOL_SIZE]; + bool ready; + unsigned head_id; }; -struct radeon_cp { +struct radeon_ring { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; + unsigned rptr_offs; + unsigned rptr_reg; unsigned wptr; unsigned wptr_old; + unsigned wptr_reg; unsigned ring_size; unsigned ring_free_dw; int count_dw; @@ -497,6 +622,61 @@ struct radeon_cp { uint32_t ptr_mask; struct mutex mutex; bool ready; + u32 ptr_reg_shift; + u32 ptr_reg_mask; + u32 nop; +}; + +/* + * VM + */ +struct radeon_vm { + struct list_head list; + struct list_head va; + int id; + unsigned last_pfn; + u64 pt_gpu_addr; + u64 *pt; + struct radeon_sa_bo sa_bo; + struct mutex mutex; + /* last fence for cs using this vm */ + struct radeon_fence *fence; +}; + +struct radeon_vm_funcs { + int (*init)(struct radeon_device *rdev); + void (*fini)(struct radeon_device *rdev); + /* cs mutex must be lock for schedule_ib */ + int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); + void (*unbind)(struct radeon_device *rdev, struct radeon_vm *vm); + void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm); + uint32_t (*page_flags)(struct radeon_device *rdev, + struct radeon_vm *vm, + uint32_t flags); + void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, + unsigned pfn, uint64_t addr, uint32_t flags); +}; + +struct radeon_vm_manager { + struct list_head lru_vm; + uint32_t use_bitmap; + struct radeon_sa_manager sa_manager; + uint32_t max_pfn; + /* fields constant after init */ + const struct radeon_vm_funcs *funcs; + /* number of VMIDs */ + unsigned nvm; + /* vram base address for page table entry */ + u64 vram_base_offset; + /* is vm enabled? */ + bool enabled; +}; + +/* + * file private structure + */ +struct radeon_fpriv { + struct radeon_vm vm; }; /* @@ -506,6 +686,7 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; + unsigned rptr_offs; unsigned wptr; unsigned wptr_old; unsigned ring_size; @@ -549,23 +730,29 @@ struct r600_blit { void r600_blit_suspend(struct radeon_device *rdev); -int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib); +int radeon_ib_get(struct radeon_device *rdev, int ring, + struct radeon_ib **ib, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib); +bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib); int radeon_ib_pool_init(struct radeon_device *rdev); void radeon_ib_pool_fini(struct radeon_device *rdev); +int radeon_ib_pool_start(struct radeon_device *rdev); +int radeon_ib_pool_suspend(struct radeon_device *rdev); int radeon_ib_test(struct radeon_device *rdev); -extern void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib); /* Ring access between begin & end cannot sleep */ -void radeon_ring_free_size(struct radeon_device *rdev); -int radeon_ring_alloc(struct radeon_device *rdev, unsigned ndw); -int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw); -void radeon_ring_commit(struct radeon_device *rdev); -void radeon_ring_unlock_commit(struct radeon_device *rdev); -void radeon_ring_unlock_undo(struct radeon_device *rdev); -int radeon_ring_test(struct radeon_device *rdev); -int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size); -void radeon_ring_fini(struct radeon_device *rdev); +int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp); +void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp); +int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); +int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); +void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp); +void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp); +void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); +int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); +int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size, + unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, + u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop); +void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); /* @@ -582,12 +769,12 @@ struct radeon_cs_reloc { struct radeon_cs_chunk { uint32_t chunk_id; uint32_t length_dw; - int kpage_idx[2]; - uint32_t *kpage[2]; + int kpage_idx[2]; + uint32_t *kpage[2]; uint32_t *kdata; - void __user *user_ptr; - int last_copied_page; - int last_page_index; + void __user *user_ptr; + int last_copied_page; + int last_page_index; }; struct radeon_cs_parser { @@ -605,14 +792,18 @@ struct radeon_cs_parser { struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; struct list_head validated; + bool sync_to_ring[RADEON_NUM_RINGS]; /* indices of various chunks */ int chunk_ib_idx; int chunk_relocs_idx; + int chunk_flags_idx; struct radeon_ib *ib; void *track; unsigned family; int parser_error; - bool keep_tiling_flags; + u32 cs_flags; + u32 ring; + s32 priority; }; extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); @@ -869,11 +1060,20 @@ void radeon_benchmark(struct radeon_device *rdev, int test_number); * Testing */ void radeon_test_moves(struct radeon_device *rdev); +void radeon_test_ring_sync(struct radeon_device *rdev, + struct radeon_ring *cpA, + struct radeon_ring *cpB); +void radeon_test_syncing(struct radeon_device *rdev); /* * Debugfs */ +struct radeon_debugfs { + struct drm_info_list *files; + unsigned num_files; +}; + int radeon_debugfs_add_files(struct radeon_device *rdev, struct drm_info_list *files, unsigned nfiles); @@ -889,21 +1089,27 @@ struct radeon_asic { int (*resume)(struct radeon_device *rdev); int (*suspend)(struct radeon_device *rdev); void (*vga_set_state)(struct radeon_device *rdev, bool state); - bool (*gpu_is_lockup)(struct radeon_device *rdev); + bool (*gpu_is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); int (*asic_reset)(struct radeon_device *rdev); void (*gart_tlb_flush)(struct radeon_device *rdev); int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr); int (*cp_init)(struct radeon_device *rdev, unsigned ring_size); void (*cp_fini)(struct radeon_device *rdev); void (*cp_disable)(struct radeon_device *rdev); - void (*cp_commit)(struct radeon_device *rdev); void (*ring_start)(struct radeon_device *rdev); - int (*ring_test)(struct radeon_device *rdev); - void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + + struct { + void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib); + void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); + void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, + struct radeon_semaphore *semaphore, bool emit_wait); + } ring[RADEON_NUM_RINGS]; + + int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); int (*irq_set)(struct radeon_device *rdev); int (*irq_process)(struct radeon_device *rdev); u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc); - void (*fence_ring_emit)(struct radeon_device *rdev, struct radeon_fence *fence); int (*cs_parse)(struct radeon_cs_parser *p); int (*copy_blit)(struct radeon_device *rdev, uint64_t src_offset, @@ -1132,6 +1338,8 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_va_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -1231,11 +1439,10 @@ struct radeon_device { struct radeon_mode_info mode_info; struct radeon_scratch scratch; struct radeon_mman mman; - struct radeon_fence_driver fence_drv; - struct radeon_cp cp; - /* cayman compute rings */ - struct radeon_cp cp1; - struct radeon_cp cp2; + rwlock_t fence_lock; + struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; + struct radeon_semaphore_driver semaphore_drv; + struct radeon_ring ring[RADEON_NUM_RINGS]; struct radeon_ib_pool ib_pool; struct radeon_irq irq; struct radeon_asic *asic; @@ -1279,6 +1486,13 @@ struct radeon_device { struct drm_file *cmask_filp; /* i2c buses */ struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS]; + /* debugfs */ + struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS]; + unsigned debugfs_count; + /* virtual memory */ + struct radeon_vm_manager vm_manager; + /* ring used for bo copies */ + u32 copy_ring; }; int radeon_device_init(struct radeon_device *rdev, @@ -1414,18 +1628,17 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ - #if DRM_DEBUG_CODE == 0 -static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) +static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) { - rdev->cp.ring[rdev->cp.wptr++] = v; - rdev->cp.wptr &= rdev->cp.ptr_mask; - rdev->cp.count_dw--; - rdev->cp.ring_free_dw--; + ring->ring[ring->wptr++] = v; + ring->wptr &= ring->ptr_mask; + ring->count_dw--; + ring->ring_free_dw--; } #else /* With debugging this is just too big to inline */ -void radeon_ring_write(struct radeon_device *rdev, uint32_t v); +void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #endif /* @@ -1437,18 +1650,19 @@ void radeon_ring_write(struct radeon_device *rdev, uint32_t v); #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) #define radeon_cs_parse(p) rdev->asic->cs_parse((p)) #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) -#define radeon_gpu_is_lockup(rdev) (rdev)->asic->gpu_is_lockup((rdev)) +#define radeon_gpu_is_lockup(rdev, cp) (rdev)->asic->gpu_is_lockup((rdev), (cp)) #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev)) #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p)) -#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev)) #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev)) -#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev)) -#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib)) +#define radeon_ring_test(rdev, cp) (rdev)->asic->ring_test((rdev), (cp)) +#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib)) +#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib)) #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc)) -#define radeon_fence_ring_emit(rdev, fence) (rdev)->asic->fence_ring_emit((rdev), (fence)) +#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence)) +#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait)) #define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy_blit((rdev), (s), (d), (np), (f)) #define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy_dma((rdev), (s), (d), (np), (f)) #define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy((rdev), (s), (d), (np), (f)) @@ -1503,6 +1717,33 @@ extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size); /* + * vm + */ +int radeon_vm_manager_init(struct radeon_device *rdev); +void radeon_vm_manager_fini(struct radeon_device *rdev); +int radeon_vm_manager_start(struct radeon_device *rdev); +int radeon_vm_manager_suspend(struct radeon_device *rdev); +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm); +void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_bo_update_pte(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + struct ttm_mem_reg *mem); +void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo); +int radeon_vm_bo_add(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + uint64_t offset, + uint32_t flags); +int radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo); + + +/* * R600 vram scratch functions */ int r600_vram_scratch_init(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index a2e1eae114ef..36a6192ce862 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -138,14 +138,18 @@ static struct radeon_asic r100_asic = { .asic_reset = &r100_asic_reset, .gart_tlb_flush = &r100_pci_gart_tlb_flush, .gart_set_page = &r100_pci_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r100_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r100_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, - .fence_ring_emit = &r100_fence_ring_emit, .cs_parse = &r100_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = NULL, @@ -186,14 +190,18 @@ static struct radeon_asic r200_asic = { .asic_reset = &r100_asic_reset, .gart_tlb_flush = &r100_pci_gart_tlb_flush, .gart_set_page = &r100_pci_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r100_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r100_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, - .fence_ring_emit = &r100_fence_ring_emit, .cs_parse = &r100_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -233,14 +241,18 @@ static struct radeon_asic r300_asic = { .asic_reset = &r300_asic_reset, .gart_tlb_flush = &r100_pci_gart_tlb_flush, .gart_set_page = &r100_pci_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -281,14 +293,18 @@ static struct radeon_asic r300_asic_pcie = { .asic_reset = &r300_asic_reset, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -328,14 +344,18 @@ static struct radeon_asic r420_asic = { .asic_reset = &r300_asic_reset, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -376,14 +396,18 @@ static struct radeon_asic rs400_asic = { .asic_reset = &r300_asic_reset, .gart_tlb_flush = &rs400_gart_tlb_flush, .gart_set_page = &rs400_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -424,14 +448,18 @@ static struct radeon_asic rs600_asic = { .asic_reset = &rs600_asic_reset, .gart_tlb_flush = &rs600_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -472,14 +500,18 @@ static struct radeon_asic rs690_asic = { .asic_reset = &rs600_asic_reset, .gart_tlb_flush = &rs400_gart_tlb_flush, .gart_set_page = &rs400_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -520,14 +552,18 @@ static struct radeon_asic rv515_asic = { .asic_reset = &rs600_asic_reset, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -568,14 +604,18 @@ static struct radeon_asic r520_asic = { .asic_reset = &rs600_asic_reset, .gart_tlb_flush = &rv370_pcie_gart_tlb_flush, .gart_set_page = &rv370_pcie_gart_set_page, - .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, .ring_test = &r100_ring_test, - .ring_ib_execute = &r100_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + } + }, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r300_fence_ring_emit, .cs_parse = &r300_cs_parse, .copy_blit = &r100_copy_blit, .copy_dma = &r200_copy_dma, @@ -611,18 +651,22 @@ static struct radeon_asic r600_asic = { .fini = &r600_fini, .suspend = &r600_suspend, .resume = &r600_resume, - .cp_commit = &r600_cp_commit, .vga_set_state = &r600_vga_set_state, .gpu_is_lockup = &r600_gpu_is_lockup, .asic_reset = &r600_asic_reset, .gart_tlb_flush = &r600_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &r600_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r600_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &r600_irq_set, .irq_process = &r600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &r600_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -658,18 +702,22 @@ static struct radeon_asic rs780_asic = { .fini = &r600_fini, .suspend = &r600_suspend, .resume = &r600_resume, - .cp_commit = &r600_cp_commit, .gpu_is_lockup = &r600_gpu_is_lockup, .vga_set_state = &r600_vga_set_state, .asic_reset = &r600_asic_reset, .gart_tlb_flush = &r600_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &r600_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r600_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &r600_irq_set, .irq_process = &r600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &r600_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -705,18 +753,22 @@ static struct radeon_asic rv770_asic = { .fini = &rv770_fini, .suspend = &rv770_suspend, .resume = &rv770_resume, - .cp_commit = &r600_cp_commit, .asic_reset = &r600_asic_reset, .gpu_is_lockup = &r600_gpu_is_lockup, .vga_set_state = &r600_vga_set_state, .gart_tlb_flush = &r600_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &r600_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &r600_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &r600_irq_set, .irq_process = &r600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &r600_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -752,18 +804,22 @@ static struct radeon_asic evergreen_asic = { .fini = &evergreen_fini, .suspend = &evergreen_suspend, .resume = &evergreen_resume, - .cp_commit = &r600_cp_commit, .gpu_is_lockup = &evergreen_gpu_is_lockup, .asic_reset = &evergreen_asic_reset, .vga_set_state = &r600_vga_set_state, .gart_tlb_flush = &evergreen_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &evergreen_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &evergreen_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &evergreen_irq_set, .irq_process = &evergreen_irq_process, .get_vblank_counter = &evergreen_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -799,18 +855,22 @@ static struct radeon_asic sumo_asic = { .fini = &evergreen_fini, .suspend = &evergreen_suspend, .resume = &evergreen_resume, - .cp_commit = &r600_cp_commit, .gpu_is_lockup = &evergreen_gpu_is_lockup, .asic_reset = &evergreen_asic_reset, .vga_set_state = &r600_vga_set_state, .gart_tlb_flush = &evergreen_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &evergreen_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &evergreen_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &evergreen_irq_set, .irq_process = &evergreen_irq_process, .get_vblank_counter = &evergreen_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -846,18 +906,22 @@ static struct radeon_asic btc_asic = { .fini = &evergreen_fini, .suspend = &evergreen_suspend, .resume = &evergreen_resume, - .cp_commit = &r600_cp_commit, .gpu_is_lockup = &evergreen_gpu_is_lockup, .asic_reset = &evergreen_asic_reset, .vga_set_state = &r600_vga_set_state, .gart_tlb_flush = &evergreen_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &evergreen_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &evergreen_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &evergreen_irq_set, .irq_process = &evergreen_irq_process, .get_vblank_counter = &evergreen_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -888,23 +952,50 @@ static struct radeon_asic btc_asic = { .post_page_flip = &evergreen_post_page_flip, }; +static const struct radeon_vm_funcs cayman_vm_funcs = { + .init = &cayman_vm_init, + .fini = &cayman_vm_fini, + .bind = &cayman_vm_bind, + .unbind = &cayman_vm_unbind, + .tlb_flush = &cayman_vm_tlb_flush, + .page_flags = &cayman_vm_page_flags, + .set_page = &cayman_vm_set_page, +}; + static struct radeon_asic cayman_asic = { .init = &cayman_init, .fini = &cayman_fini, .suspend = &cayman_suspend, .resume = &cayman_resume, - .cp_commit = &r600_cp_commit, .gpu_is_lockup = &cayman_gpu_is_lockup, .asic_reset = &cayman_asic_reset, .vga_set_state = &r600_vga_set_state, .gart_tlb_flush = &cayman_pcie_gart_tlb_flush, .gart_set_page = &rs600_gart_set_page, .ring_test = &r600_ring_test, - .ring_ib_execute = &evergreen_ring_ib_execute, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + }, + [CAYMAN_RING_TYPE_CP1_INDEX] = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + }, + [CAYMAN_RING_TYPE_CP2_INDEX] = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + } + }, .irq_set = &evergreen_irq_set, .irq_process = &evergreen_irq_process, .get_vblank_counter = &evergreen_get_vblank_counter, - .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &r600_copy_blit, .copy_dma = NULL, @@ -945,6 +1036,9 @@ int radeon_asic_init(struct radeon_device *rdev) else rdev->num_crtc = 2; + /* set the ring used for bo copies */ + rdev->copy_ring = RADEON_RING_TYPE_GFX_INDEX; + switch (rdev->family) { case CHIP_R100: case CHIP_RV100: @@ -1050,6 +1144,7 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->asic = &cayman_asic; /* set num crtcs */ rdev->num_crtc = 6; + rdev->vm_manager.funcs = &cayman_vm_funcs; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 59914842a729..6304aef0d9b2 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -58,17 +58,20 @@ void r100_fini(struct radeon_device *rdev); int r100_suspend(struct radeon_device *rdev); int r100_resume(struct radeon_device *rdev); void r100_vga_set_state(struct radeon_device *rdev, bool state); -bool r100_gpu_is_lockup(struct radeon_device *rdev); +bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int r100_asic_reset(struct radeon_device *rdev); u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc); void r100_pci_gart_tlb_flush(struct radeon_device *rdev); int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); -void r100_cp_commit(struct radeon_device *rdev); void r100_ring_start(struct radeon_device *rdev); int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); void r100_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); +void r100_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *cp, + struct radeon_semaphore *semaphore, + bool emit_wait); int r100_cs_parse(struct radeon_cs_parser *p); void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg); @@ -83,7 +86,7 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg, void r100_clear_surface_reg(struct radeon_device *rdev, int reg); void r100_bandwidth_update(struct radeon_device *rdev); void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); -int r100_ring_test(struct radeon_device *rdev); +int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); void r100_hpd_init(struct radeon_device *rdev); void r100_hpd_fini(struct radeon_device *rdev); bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -101,12 +104,12 @@ void r100_pci_gart_disable(struct radeon_device *rdev); int r100_debugfs_mc_info_init(struct radeon_device *rdev); int r100_gui_wait_for_idle(struct radeon_device *rdev); void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, - struct radeon_cp *cp); + struct radeon_ring *cp); bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, - struct radeon_cp *cp); + struct radeon_ring *cp); void r100_ib_fini(struct radeon_device *rdev); -int r100_ib_init(struct radeon_device *rdev); +int r100_ib_test(struct radeon_device *rdev); void r100_irq_disable(struct radeon_device *rdev); void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save); void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save); @@ -154,7 +157,7 @@ extern int r300_init(struct radeon_device *rdev); extern void r300_fini(struct radeon_device *rdev); extern int r300_suspend(struct radeon_device *rdev); extern int r300_resume(struct radeon_device *rdev); -extern bool r300_gpu_is_lockup(struct radeon_device *rdev); +extern bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); extern int r300_asic_reset(struct radeon_device *rdev); extern void r300_ring_start(struct radeon_device *rdev); extern void r300_fence_ring_emit(struct radeon_device *rdev, @@ -293,22 +296,25 @@ int r600_resume(struct radeon_device *rdev); void r600_vga_set_state(struct radeon_device *rdev, bool state); int r600_wb_init(struct radeon_device *rdev); void r600_wb_fini(struct radeon_device *rdev); -void r600_cp_commit(struct radeon_device *rdev); void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int r600_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -bool r600_gpu_is_lockup(struct radeon_device *rdev); +void r600_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *cp, + struct radeon_semaphore *semaphore, + bool emit_wait); +bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int r600_asic_reset(struct radeon_device *rdev); int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size); void r600_clear_surface_reg(struct radeon_device *rdev, int reg); -int r600_ib_test(struct radeon_device *rdev); +int r600_ib_test(struct radeon_device *rdev, int ring); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); -int r600_ring_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence *fence); @@ -328,7 +334,7 @@ extern int r600_get_pcie_lanes(struct radeon_device *rdev); bool r600_card_posted(struct radeon_device *rdev); void r600_cp_stop(struct radeon_device *rdev); int r600_cp_start(struct radeon_device *rdev); -void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); +void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size); int r600_cp_resume(struct radeon_device *rdev); void r600_cp_fini(struct radeon_device *rdev); int r600_count_pipe_bits(uint32_t val); @@ -397,7 +403,7 @@ int evergreen_init(struct radeon_device *rdev); void evergreen_fini(struct radeon_device *rdev); int evergreen_suspend(struct radeon_device *rdev); int evergreen_resume(struct radeon_device *rdev); -bool evergreen_gpu_is_lockup(struct radeon_device *rdev); +bool evergreen_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int evergreen_asic_reset(struct radeon_device *rdev); void evergreen_bandwidth_update(struct radeon_device *rdev); void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); @@ -423,12 +429,26 @@ int evergreen_blit_init(struct radeon_device *rdev); /* * cayman */ +void cayman_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); int cayman_init(struct radeon_device *rdev); void cayman_fini(struct radeon_device *rdev); int cayman_suspend(struct radeon_device *rdev); int cayman_resume(struct radeon_device *rdev); -bool cayman_gpu_is_lockup(struct radeon_device *rdev); +bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int cayman_asic_reset(struct radeon_device *rdev); +void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int cayman_vm_init(struct radeon_device *rdev); +void cayman_vm_fini(struct radeon_device *rdev); +int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); +void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); +uint32_t cayman_vm_page_flags(struct radeon_device *rdev, + struct radeon_vm *vm, + uint32_t flags); +void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, + unsigned pfn, uint64_t addr, uint32_t flags); +int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); #endif diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 17e1a9b2d8fb..815f2341ab94 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -43,7 +43,7 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, start_jiffies = jiffies; for (i = 0; i < n; i++) { - r = radeon_fence_create(rdev, &fence); + r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); if (r) return r; @@ -229,21 +229,21 @@ void radeon_benchmark(struct radeon_device *rdev, int test_number) break; case 6: /* GTT to VRAM, buffer size sweep, common modes */ - for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) + for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) radeon_benchmark_move(rdev, common_modes[i], RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM); break; case 7: /* VRAM to GTT, buffer size sweep, common modes */ - for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) + for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) radeon_benchmark_move(rdev, common_modes[i], RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_GTT); break; case 8: /* VRAM to VRAM, buffer size sweep, common modes */ - for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) + for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) radeon_benchmark_move(rdev, common_modes[i], RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 29afd71e0840..435a3d970ab8 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -58,7 +58,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) duplicate = false; r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; - for (j = 0; j < p->nrelocs; j++) { + for (j = 0; j < i; j++) { if (r->handle == p->relocs[j].handle) { p->relocs_ptr[i] = &p->relocs[j]; duplicate = true; @@ -84,16 +84,75 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].flags = r->flags; radeon_bo_list_add_object(&p->relocs[i].lobj, &p->validated); - } + + if (p->relocs[i].robj->tbo.sync_obj && !(r->flags & RADEON_RELOC_DONT_SYNC)) { + struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj; + if (!radeon_fence_signaled(fence)) { + p->sync_to_ring[fence->ring] = true; + } + } + } else + p->relocs[i].handle = 0; } return radeon_bo_list_validate(&p->validated); } +static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) +{ + p->priority = priority; + + switch (ring) { + default: + DRM_ERROR("unknown ring id: %d\n", ring); + return -EINVAL; + case RADEON_CS_RING_GFX: + p->ring = RADEON_RING_TYPE_GFX_INDEX; + break; + case RADEON_CS_RING_COMPUTE: + /* for now */ + p->ring = RADEON_RING_TYPE_GFX_INDEX; + break; + } + return 0; +} + +static int radeon_cs_sync_rings(struct radeon_cs_parser *p) +{ + int i, r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + /* no need to sync to our own or unused rings */ + if (i == p->ring || !p->sync_to_ring[i] || !p->rdev->ring[i].ready) + continue; + + if (!p->ib->fence->semaphore) { + r = radeon_semaphore_create(p->rdev, &p->ib->fence->semaphore); + if (r) + return r; + } + + r = radeon_ring_lock(p->rdev, &p->rdev->ring[i], 3); + if (r) + return r; + radeon_semaphore_emit_signal(p->rdev, i, p->ib->fence->semaphore); + radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[i]); + + r = radeon_ring_lock(p->rdev, &p->rdev->ring[p->ring], 3); + if (r) + return r; + radeon_semaphore_emit_wait(p->rdev, p->ring, p->ib->fence->semaphore); + radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[p->ring]); + } + return 0; +} + int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) { struct drm_radeon_cs *cs = data; uint64_t *chunk_array_ptr; - unsigned size, i, flags = 0; + unsigned size, i; + u32 ring = RADEON_CS_RING_GFX; + s32 priority = 0; if (!cs->num_chunks) { return 0; @@ -103,6 +162,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) p->idx = 0; p->chunk_ib_idx = -1; p->chunk_relocs_idx = -1; + p->chunk_flags_idx = -1; p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); if (p->chunks_array == NULL) { return -ENOMEM; @@ -112,6 +172,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) sizeof(uint64_t)*cs->num_chunks)) { return -EFAULT; } + p->cs_flags = 0; p->nchunks = cs->num_chunks; p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); if (p->chunks == NULL) { @@ -140,16 +201,19 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) if (p->chunks[i].length_dw == 0) return -EINVAL; } - if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS && - !p->chunks[i].length_dw) { - return -EINVAL; + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { + p->chunk_flags_idx = i; + /* zero length flags aren't useful */ + if (p->chunks[i].length_dw == 0) + return -EINVAL; } p->chunks[i].length_dw = user_chunk.length_dw; p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; - if (p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB) { + if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || + (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { size = p->chunks[i].length_dw * sizeof(uint32_t); p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); if (p->chunks[i].kdata == NULL) { @@ -160,29 +224,58 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EFAULT; } if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { - flags = p->chunks[i].kdata[0]; + p->cs_flags = p->chunks[i].kdata[0]; + if (p->chunks[i].length_dw > 1) + ring = p->chunks[i].kdata[1]; + if (p->chunks[i].length_dw > 2) + priority = (s32)p->chunks[i].kdata[2]; } - } else { - p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); - p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) { - kfree(p->chunks[i].kpage[0]); - kfree(p->chunks[i].kpage[1]); - return -ENOMEM; - } - p->chunks[i].kpage_idx[0] = -1; - p->chunks[i].kpage_idx[1] = -1; - p->chunks[i].last_copied_page = -1; - p->chunks[i].last_page_index = ((p->chunks[i].length_dw * 4) - 1) / PAGE_SIZE; } } - if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { - DRM_ERROR("cs IB too big: %d\n", - p->chunks[p->chunk_ib_idx].length_dw); + + if ((p->cs_flags & RADEON_CS_USE_VM) && + !p->rdev->vm_manager.enabled) { + DRM_ERROR("VM not active on asic!\n"); + if (p->chunk_relocs_idx != -1) + kfree(p->chunks[p->chunk_relocs_idx].kdata); + if (p->chunk_flags_idx != -1) + kfree(p->chunks[p->chunk_flags_idx].kdata); return -EINVAL; } - p->keep_tiling_flags = (flags & RADEON_CS_KEEP_TILING_FLAGS) != 0; + if (radeon_cs_get_ring(p, ring, priority)) { + if (p->chunk_relocs_idx != -1) + kfree(p->chunks[p->chunk_relocs_idx].kdata); + if (p->chunk_flags_idx != -1) + kfree(p->chunks[p->chunk_flags_idx].kdata); + return -EINVAL; + } + + + /* deal with non-vm */ + if ((p->chunk_ib_idx != -1) && + ((p->cs_flags & RADEON_CS_USE_VM) == 0) && + (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { + if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { + DRM_ERROR("cs IB too big: %d\n", + p->chunks[p->chunk_ib_idx].length_dw); + return -EINVAL; + } + p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); + p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || + p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { + kfree(p->chunks[p->chunk_ib_idx].kpage[0]); + kfree(p->chunks[p->chunk_ib_idx].kpage[1]); + return -ENOMEM; + } + p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; + p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; + p->chunks[p->chunk_ib_idx].last_copied_page = -1; + p->chunks[p->chunk_ib_idx].last_page_index = + ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; + } + return 0; } @@ -224,11 +317,139 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) radeon_ib_free(parser->rdev, &parser->ib); } +static int radeon_cs_ib_chunk(struct radeon_device *rdev, + struct radeon_cs_parser *parser) +{ + struct radeon_cs_chunk *ib_chunk; + int r; + + if (parser->chunk_ib_idx == -1) + return 0; + + if (parser->cs_flags & RADEON_CS_USE_VM) + return 0; + + ib_chunk = &parser->chunks[parser->chunk_ib_idx]; + /* Copy the packet into the IB, the parser will read from the + * input memory (cached) and write to the IB (which can be + * uncached). + */ + r = radeon_ib_get(rdev, parser->ring, &parser->ib, + ib_chunk->length_dw * 4); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + parser->ib->length_dw = ib_chunk->length_dw; + r = radeon_cs_parse(parser); + if (r || parser->parser_error) { + DRM_ERROR("Invalid command stream !\n"); + return r; + } + r = radeon_cs_finish_pages(parser); + if (r) { + DRM_ERROR("Invalid command stream !\n"); + return r; + } + r = radeon_cs_sync_rings(parser); + if (r) { + DRM_ERROR("Failed to synchronize rings !\n"); + } + parser->ib->vm_id = 0; + r = radeon_ib_schedule(rdev, parser->ib); + if (r) { + DRM_ERROR("Failed to schedule IB !\n"); + } + return 0; +} + +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, + struct radeon_vm *vm) +{ + struct radeon_bo_list *lobj; + struct radeon_bo *bo; + int r; + + list_for_each_entry(lobj, &parser->validated, tv.head) { + bo = lobj->bo; + r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem); + if (r) { + return r; + } + } + return 0; +} + +static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, + struct radeon_cs_parser *parser) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_fpriv *fpriv = parser->filp->driver_priv; + struct radeon_vm *vm = &fpriv->vm; + int r; + + if (parser->chunk_ib_idx == -1) + return 0; + + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) + return 0; + + ib_chunk = &parser->chunks[parser->chunk_ib_idx]; + if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { + DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); + return -EINVAL; + } + r = radeon_ib_get(rdev, parser->ring, &parser->ib, + ib_chunk->length_dw * 4); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + parser->ib->length_dw = ib_chunk->length_dw; + /* Copy the packet into the IB */ + if (DRM_COPY_FROM_USER(parser->ib->ptr, ib_chunk->user_ptr, + ib_chunk->length_dw * 4)) { + return -EFAULT; + } + r = radeon_ring_ib_parse(rdev, parser->ring, parser->ib); + if (r) { + return r; + } + + mutex_lock(&vm->mutex); + r = radeon_vm_bind(rdev, vm); + if (r) { + goto out; + } + r = radeon_bo_vm_update_pte(parser, vm); + if (r) { + goto out; + } + r = radeon_cs_sync_rings(parser); + if (r) { + DRM_ERROR("Failed to synchronize rings !\n"); + } + parser->ib->vm_id = vm->id; + /* ib pool is bind at 0 in virtual address space to gpu_addr is the + * offset inside the pool bo + */ + parser->ib->gpu_addr = parser->ib->sa_bo.offset; + r = radeon_ib_schedule(rdev, parser->ib); +out: + if (!r) { + if (vm->fence) { + radeon_fence_unref(&vm->fence); + } + vm->fence = radeon_fence_ref(parser->ib->fence); + } + mutex_unlock(&fpriv->vm.mutex); + return r; +} + int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct radeon_device *rdev = dev->dev_private; struct radeon_cs_parser parser; - struct radeon_cs_chunk *ib_chunk; int r; radeon_mutex_lock(&rdev->cs_mutex); @@ -245,13 +466,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) radeon_mutex_unlock(&rdev->cs_mutex); return r; } - r = radeon_ib_get(rdev, &parser.ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - radeon_cs_parser_fini(&parser, r); - radeon_mutex_unlock(&rdev->cs_mutex); - return r; - } r = radeon_cs_parser_relocs(&parser); if (r) { if (r != -ERESTARTSYS) @@ -260,29 +474,15 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) radeon_mutex_unlock(&rdev->cs_mutex); return r; } - /* Copy the packet into the IB, the parser will read from the - * input memory (cached) and write to the IB (which can be - * uncached). */ - ib_chunk = &parser.chunks[parser.chunk_ib_idx]; - parser.ib->length_dw = ib_chunk->length_dw; - r = radeon_cs_parse(&parser); - if (r || parser.parser_error) { - DRM_ERROR("Invalid command stream !\n"); - radeon_cs_parser_fini(&parser, r); - radeon_mutex_unlock(&rdev->cs_mutex); - return r; - } - r = radeon_cs_finish_pages(&parser); + r = radeon_cs_ib_chunk(rdev, &parser); if (r) { - DRM_ERROR("Invalid command stream !\n"); - radeon_cs_parser_fini(&parser, r); - radeon_mutex_unlock(&rdev->cs_mutex); - return r; + goto out; } - r = radeon_ib_schedule(rdev, parser.ib); + r = radeon_cs_ib_vm_chunk(rdev, &parser); if (r) { - DRM_ERROR("Failed to schedule IB !\n"); + goto out; } +out: radeon_cs_parser_fini(&parser, r); radeon_mutex_unlock(&rdev->cs_mutex); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c4d00a171411..0afb13bd8dca 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -224,8 +224,11 @@ int radeon_wb_init(struct radeon_device *rdev) if (radeon_no_wb == 1) rdev->wb.enabled = false; else { - /* often unreliable on AGP */ if (rdev->flags & RADEON_IS_AGP) { + /* often unreliable on AGP */ + rdev->wb.enabled = false; + } else if (rdev->family < CHIP_R300) { + /* often unreliable on pre-r300 */ rdev->wb.enabled = false; } else { rdev->wb.enabled = true; @@ -718,17 +721,24 @@ int radeon_device_init(struct radeon_device *rdev, * can recall function without having locking issues */ radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ib_pool.mutex); - mutex_init(&rdev->cp.mutex); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + mutex_init(&rdev->ring[i].mutex); mutex_init(&rdev->dc_hw_i2c_mutex); if (rdev->family >= CHIP_R600) spin_lock_init(&rdev->ih.lock); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); mutex_init(&rdev->vram_mutex); - rwlock_init(&rdev->fence_drv.lock); + rwlock_init(&rdev->fence_lock); + rwlock_init(&rdev->semaphore_drv.lock); INIT_LIST_HEAD(&rdev->gem.objects); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); + INIT_LIST_HEAD(&rdev->semaphore_drv.bo); + /* initialize vm here */ + rdev->vm_manager.use_bitmap = 1; + rdev->vm_manager.max_pfn = 1 << 20; + INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); /* Set asic functions */ r = radeon_asic_init(rdev); @@ -765,8 +775,14 @@ int radeon_device_init(struct radeon_device *rdev, r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits)); if (r) { rdev->need_dma32 = true; + dma_bits = 32; printk(KERN_WARNING "radeon: No suitable DMA available.\n"); } + r = pci_set_consistent_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + pci_set_consistent_dma_mask(rdev->pdev, DMA_BIT_MASK(32)); + printk(KERN_WARNING "radeon: No coherent DMA available.\n"); + } /* Registers mapping */ /* TODO: block userspace mapping of io register */ @@ -814,15 +830,20 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; } - if (radeon_testing) { + if ((radeon_testing & 1)) { radeon_test_moves(rdev); } + if ((radeon_testing & 2)) { + radeon_test_syncing(rdev); + } if (radeon_benchmarking) { radeon_benchmark(rdev, radeon_benchmarking); } return 0; } +static void radeon_debugfs_remove_files(struct radeon_device *rdev); + void radeon_device_fini(struct radeon_device *rdev) { DRM_INFO("radeon: finishing device.\n"); @@ -837,6 +858,7 @@ void radeon_device_fini(struct radeon_device *rdev) rdev->rio_mem = NULL; iounmap(rdev->rmmio); rdev->rmmio = NULL; + radeon_debugfs_remove_files(rdev); } @@ -848,7 +870,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) struct radeon_device *rdev; struct drm_crtc *crtc; struct drm_connector *connector; - int r; + int i, r; if (dev == NULL || dev->dev_private == NULL) { return -ENODEV; @@ -887,7 +909,8 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) /* evict vram memory */ radeon_bo_evict_vram(rdev); /* wait for gpu to finish processing current batch */ - radeon_fence_wait_last(rdev); + for (i = 0; i < RADEON_NUM_RINGS; i++) + radeon_fence_wait_last(rdev, i); radeon_save_bios_scratch_regs(rdev); @@ -986,36 +1009,29 @@ int radeon_gpu_reset(struct radeon_device *rdev) /* * Debugfs */ -struct radeon_debugfs { - struct drm_info_list *files; - unsigned num_files; -}; -static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS]; -static unsigned _radeon_debugfs_count = 0; - int radeon_debugfs_add_files(struct radeon_device *rdev, struct drm_info_list *files, unsigned nfiles) { unsigned i; - for (i = 0; i < _radeon_debugfs_count; i++) { - if (_radeon_debugfs[i].files == files) { + for (i = 0; i < rdev->debugfs_count; i++) { + if (rdev->debugfs[i].files == files) { /* Already registered */ return 0; } } - i = _radeon_debugfs_count + 1; + i = rdev->debugfs_count + 1; if (i > RADEON_DEBUGFS_MAX_COMPONENTS) { DRM_ERROR("Reached maximum number of debugfs components.\n"); DRM_ERROR("Report so we increase " "RADEON_DEBUGFS_MAX_COMPONENTS.\n"); return -EINVAL; } - _radeon_debugfs[_radeon_debugfs_count].files = files; - _radeon_debugfs[_radeon_debugfs_count].num_files = nfiles; - _radeon_debugfs_count = i; + rdev->debugfs[rdev->debugfs_count].files = files; + rdev->debugfs[rdev->debugfs_count].num_files = nfiles; + rdev->debugfs_count = i; #if defined(CONFIG_DEBUG_FS) drm_debugfs_create_files(files, nfiles, rdev->ddev->control->debugfs_root, @@ -1027,6 +1043,22 @@ int radeon_debugfs_add_files(struct radeon_device *rdev, return 0; } +static void radeon_debugfs_remove_files(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + unsigned i; + + for (i = 0; i < rdev->debugfs_count; i++) { + drm_debugfs_remove_files(rdev->debugfs[i].files, + rdev->debugfs[i].num_files, + rdev->ddev->control); + drm_debugfs_remove_files(rdev->debugfs[i].files, + rdev->debugfs[i].num_files, + rdev->ddev->primary); + } +#endif +} + #if defined(CONFIG_DEBUG_FS) int radeon_debugfs_init(struct drm_minor *minor) { @@ -1035,11 +1067,5 @@ int radeon_debugfs_init(struct drm_minor *minor) void radeon_debugfs_cleanup(struct drm_minor *minor) { - unsigned i; - - for (i = 0; i < _radeon_debugfs_count; i++) { - drm_debugfs_remove_files(_radeon_debugfs[i].files, - _radeon_debugfs[i].num_files, minor); - } } #endif diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index a22d6e6a49a2..d3ffc18774a6 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -406,7 +406,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, if (!ASIC_IS_AVIVO(rdev)) { /* crtc offset is from display base addr not FB location */ base -= radeon_crtc->legacy_display_base_addr; - pitch_pixels = fb->pitch / (fb->bits_per_pixel / 8); + pitch_pixels = fb->pitches[0] / (fb->bits_per_pixel / 8); if (tiling_flags & RADEON_TILING_MACRO) { if (ASIC_IS_R300(rdev)) { @@ -1081,7 +1081,7 @@ static const struct drm_framebuffer_funcs radeon_fb_funcs = { void radeon_framebuffer_init(struct drm_device *dev, struct radeon_framebuffer *rfb, - struct drm_mode_fb_cmd *mode_cmd, + struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { rfb->obj = obj; @@ -1092,15 +1092,15 @@ radeon_framebuffer_init(struct drm_device *dev, static struct drm_framebuffer * radeon_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, - struct drm_mode_fb_cmd *mode_cmd) + struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; struct radeon_framebuffer *radeon_fb; - obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle); + obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]); if (obj == NULL) { dev_err(&dev->pdev->dev, "No GEM object associated to handle 0x%08X, " - "can't create framebuffer\n", mode_cmd->handle); + "can't create framebuffer\n", mode_cmd->handles[0]); return ERR_PTR(-ENOENT); } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 71499fc3daf5..31da622eef63 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -54,9 +54,10 @@ * 2.10.0 - fusion 2D tiling * 2.11.0 - backend map, initial compute support for the CS checker * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS + * 2.13.0 - virtual memory support */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 12 +#define KMS_DRIVER_MINOR 13 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); @@ -84,6 +85,10 @@ int radeon_dma_ioctl_kms(struct drm_device *dev, void *data, struct drm_file *file_priv); int radeon_gem_object_init(struct drm_gem_object *obj); void radeon_gem_object_free(struct drm_gem_object *obj); +int radeon_gem_object_open(struct drm_gem_object *obj, + struct drm_file *file_priv); +void radeon_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv); extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, int *vpos, int *hpos); extern struct drm_ioctl_desc radeon_ioctls_kms[]; @@ -206,6 +211,21 @@ static struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); #endif +static const struct file_operations radeon_driver_old_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = drm_mmap, + .poll = drm_poll, + .fasync = drm_fasync, + .read = drm_read, +#ifdef CONFIG_COMPAT + .compat_ioctl = radeon_compat_ioctl, +#endif + .llseek = noop_llseek, +}; + static struct drm_driver driver_old = { .driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG | @@ -232,21 +252,7 @@ static struct drm_driver driver_old = { .reclaim_buffers = drm_core_reclaim_buffers, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, - .fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .mmap = drm_mmap, - .poll = drm_poll, - .fasync = drm_fasync, - .read = drm_read, -#ifdef CONFIG_COMPAT - .compat_ioctl = radeon_compat_ioctl, -#endif - .llseek = noop_llseek, - }, - + .fops = &radeon_driver_old_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, @@ -304,6 +310,20 @@ radeon_pci_resume(struct pci_dev *pdev) return radeon_resume_kms(dev); } +static const struct file_operations radeon_driver_kms_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = radeon_mmap, + .poll = drm_poll, + .fasync = drm_fasync, + .read = drm_read, +#ifdef CONFIG_COMPAT + .compat_ioctl = radeon_kms_compat_ioctl, +#endif +}; + static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG | @@ -335,24 +355,13 @@ static struct drm_driver kms_driver = { .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, .gem_free_object = radeon_gem_object_free, + .gem_open_object = radeon_gem_object_open, + .gem_close_object = radeon_gem_object_close, .dma_ioctl = radeon_dma_ioctl_kms, .dumb_create = radeon_mode_dumb_create, .dumb_map_offset = radeon_mode_dumb_mmap, .dumb_destroy = radeon_mode_dumb_destroy, - .fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .mmap = radeon_mmap, - .poll = drm_poll, - .fasync = drm_fasync, - .read = drm_read, -#ifdef CONFIG_COMPAT - .compat_ioctl = radeon_kms_compat_ioctl, -#endif - }, - + .fops = &radeon_driver_kms_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 0b7b486c97e8..cf2bf35b56b8 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -103,7 +103,7 @@ static void radeonfb_destroy_pinned_object(struct drm_gem_object *gobj) } static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, - struct drm_mode_fb_cmd *mode_cmd, + struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **gobj_p) { struct radeon_device *rdev = rfbdev->rdev; @@ -114,13 +114,17 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, int ret; int aligned_size, size; int height = mode_cmd->height; + u32 bpp, depth; + + drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp); /* need to align pitch with crtc limits */ - mode_cmd->pitch = radeon_align_pitch(rdev, mode_cmd->width, mode_cmd->bpp, fb_tiled) * ((mode_cmd->bpp + 1) / 8); + mode_cmd->pitches[0] = radeon_align_pitch(rdev, mode_cmd->width, bpp, + fb_tiled) * ((bpp + 1) / 8); if (rdev->family >= CHIP_R600) height = ALIGN(mode_cmd->height, 8); - size = mode_cmd->pitch * height; + size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); ret = radeon_gem_object_create(rdev, aligned_size, 0, RADEON_GEM_DOMAIN_VRAM, @@ -137,7 +141,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, tiling_flags = RADEON_TILING_MACRO; #ifdef __BIG_ENDIAN - switch (mode_cmd->bpp) { + switch (bpp) { case 32: tiling_flags |= RADEON_TILING_SWAP_32BIT; break; @@ -151,7 +155,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, if (tiling_flags) { ret = radeon_bo_set_tiling_flags(rbo, tiling_flags | RADEON_TILING_SURFACE, - mode_cmd->pitch); + mode_cmd->pitches[0]); if (ret) dev_err(rdev->dev, "FB failed to set tiling flags\n"); } @@ -187,7 +191,7 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev, struct radeon_device *rdev = rfbdev->rdev; struct fb_info *info; struct drm_framebuffer *fb = NULL; - struct drm_mode_fb_cmd mode_cmd; + struct drm_mode_fb_cmd2 mode_cmd; struct drm_gem_object *gobj = NULL; struct radeon_bo *rbo = NULL; struct device *device = &rdev->pdev->dev; @@ -201,8 +205,8 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev, if ((sizes->surface_bpp == 24) && ASIC_IS_AVIVO(rdev)) sizes->surface_bpp = 32; - mode_cmd.bpp = sizes->surface_bpp; - mode_cmd.depth = sizes->surface_depth; + mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); ret = radeonfb_create_pinned_object(rfbdev, &mode_cmd, &gobj); rbo = gem_to_radeon_bo(gobj); @@ -228,7 +232,7 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev, strcpy(info->fix.id, "radeondrmfb"); - drm_fb_helper_fill_fix(info, fb->pitch, fb->depth); + drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth); info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &radeonfb_ops; @@ -271,7 +275,7 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev, DRM_INFO("vram apper at 0x%lX\n", (unsigned long)rdev->mc.aper_base); DRM_INFO("size %lu\n", (unsigned long)radeon_bo_size(rbo)); DRM_INFO("fb depth is %d\n", fb->depth); - DRM_INFO(" pitch is %d\n", fb->pitch); + DRM_INFO(" pitch is %d\n", fb->pitches[0]); vga_switcheroo_client_fb_set(rdev->ddev->pdev, info); return 0; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 76ec0e9ed8ae..64ea3dd9e6ff 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -40,32 +40,24 @@ #include "radeon.h" #include "radeon_trace.h" -static void radeon_fence_write(struct radeon_device *rdev, u32 seq) +static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring) { if (rdev->wb.enabled) { - u32 scratch_index; - if (rdev->wb.use_event) - scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; - else - scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; - rdev->wb.wb[scratch_index/4] = cpu_to_le32(seq); - } else - WREG32(rdev->fence_drv.scratch_reg, seq); + *rdev->fence_drv[ring].cpu_addr = cpu_to_le32(seq); + } else { + WREG32(rdev->fence_drv[ring].scratch_reg, seq); + } } -static u32 radeon_fence_read(struct radeon_device *rdev) +static u32 radeon_fence_read(struct radeon_device *rdev, int ring) { - u32 seq; + u32 seq = 0; if (rdev->wb.enabled) { - u32 scratch_index; - if (rdev->wb.use_event) - scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; - else - scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; - seq = le32_to_cpu(rdev->wb.wb[scratch_index/4]); - } else - seq = RREG32(rdev->fence_drv.scratch_reg); + seq = le32_to_cpu(*rdev->fence_drv[ring].cpu_addr); + } else { + seq = RREG32(rdev->fence_drv[ring].scratch_reg); + } return seq; } @@ -73,28 +65,28 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) { unsigned long irq_flags; - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - if (fence->emited) { - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + if (fence->emitted) { + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } - fence->seq = atomic_add_return(1, &rdev->fence_drv.seq); - if (!rdev->cp.ready) + fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq); + if (!rdev->ring[fence->ring].ready) /* FIXME: cp is not running assume everythings is done right * away */ - radeon_fence_write(rdev, fence->seq); + radeon_fence_write(rdev, fence->seq, fence->ring); else - radeon_fence_ring_emit(rdev, fence); + radeon_fence_ring_emit(rdev, fence->ring, fence); trace_radeon_fence_emit(rdev->ddev, fence->seq); - fence->emited = true; - list_move_tail(&fence->list, &rdev->fence_drv.emited); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + fence->emitted = true; + list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } -static bool radeon_fence_poll_locked(struct radeon_device *rdev) +static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) { struct radeon_fence *fence; struct list_head *i, *n; @@ -102,34 +94,34 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev) bool wake = false; unsigned long cjiffies; - seq = radeon_fence_read(rdev); - if (seq != rdev->fence_drv.last_seq) { - rdev->fence_drv.last_seq = seq; - rdev->fence_drv.last_jiffies = jiffies; - rdev->fence_drv.last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; + seq = radeon_fence_read(rdev, ring); + if (seq != rdev->fence_drv[ring].last_seq) { + rdev->fence_drv[ring].last_seq = seq; + rdev->fence_drv[ring].last_jiffies = jiffies; + rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; } else { cjiffies = jiffies; - if (time_after(cjiffies, rdev->fence_drv.last_jiffies)) { - cjiffies -= rdev->fence_drv.last_jiffies; - if (time_after(rdev->fence_drv.last_timeout, cjiffies)) { + if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) { + cjiffies -= rdev->fence_drv[ring].last_jiffies; + if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) { /* update the timeout */ - rdev->fence_drv.last_timeout -= cjiffies; + rdev->fence_drv[ring].last_timeout -= cjiffies; } else { /* the 500ms timeout is elapsed we should test * for GPU lockup */ - rdev->fence_drv.last_timeout = 1; + rdev->fence_drv[ring].last_timeout = 1; } } else { /* wrap around update last jiffies, we will just wait * a little longer */ - rdev->fence_drv.last_jiffies = cjiffies; + rdev->fence_drv[ring].last_jiffies = cjiffies; } return false; } n = NULL; - list_for_each(i, &rdev->fence_drv.emited) { + list_for_each(i, &rdev->fence_drv[ring].emitted) { fence = list_entry(i, struct radeon_fence, list); if (fence->seq == seq) { n = i; @@ -141,11 +133,11 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev) i = n; do { n = i->prev; - list_move_tail(i, &rdev->fence_drv.signaled); + list_move_tail(i, &rdev->fence_drv[ring].signaled); fence = list_entry(i, struct radeon_fence, list); fence->signaled = true; i = n; - } while (i != &rdev->fence_drv.emited); + } while (i != &rdev->fence_drv[ring].emitted); wake = true; } return wake; @@ -157,14 +149,18 @@ static void radeon_fence_destroy(struct kref *kref) struct radeon_fence *fence; fence = container_of(kref, struct radeon_fence, kref); - write_lock_irqsave(&fence->rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); list_del(&fence->list); - fence->emited = false; - write_unlock_irqrestore(&fence->rdev->fence_drv.lock, irq_flags); + fence->emitted = false; + write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); + if (fence->semaphore) + radeon_semaphore_free(fence->rdev, fence->semaphore); kfree(fence); } -int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence) +int radeon_fence_create(struct radeon_device *rdev, + struct radeon_fence **fence, + int ring) { unsigned long irq_flags; @@ -174,18 +170,19 @@ int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence) } kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->emited = false; + (*fence)->emitted = false; (*fence)->signaled = false; (*fence)->seq = 0; + (*fence)->ring = ring; + (*fence)->semaphore = NULL; INIT_LIST_HEAD(&(*fence)->list); - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - list_add_tail(&(*fence)->list, &rdev->fence_drv.created); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + list_add_tail(&(*fence)->list, &rdev->fence_drv[ring].created); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } - bool radeon_fence_signaled(struct radeon_fence *fence) { unsigned long irq_flags; @@ -197,21 +194,21 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (fence->rdev->gpu_lockup) return true; - write_lock_irqsave(&fence->rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); signaled = fence->signaled; /* if we are shuting down report all fence as signaled */ if (fence->rdev->shutdown) { signaled = true; } - if (!fence->emited) { - WARN(1, "Querying an unemited fence : %p !\n", fence); + if (!fence->emitted) { + WARN(1, "Querying an unemitted fence : %p !\n", fence); signaled = true; } if (!signaled) { - radeon_fence_poll_locked(fence->rdev); + radeon_fence_poll_locked(fence->rdev, fence->ring); signaled = fence->signaled; } - write_unlock_irqrestore(&fence->rdev->fence_drv.lock, irq_flags); + write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); return signaled; } @@ -230,24 +227,24 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) if (radeon_fence_signaled(fence)) { return 0; } - timeout = rdev->fence_drv.last_timeout; + timeout = rdev->fence_drv[fence->ring].last_timeout; retry: /* save current sequence used to check for GPU lockup */ - seq = rdev->fence_drv.last_seq; + seq = rdev->fence_drv[fence->ring].last_seq; trace_radeon_fence_wait_begin(rdev->ddev, seq); if (intr) { - radeon_irq_kms_sw_irq_get(rdev); - r = wait_event_interruptible_timeout(rdev->fence_drv.queue, + radeon_irq_kms_sw_irq_get(rdev, fence->ring); + r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue, radeon_fence_signaled(fence), timeout); - radeon_irq_kms_sw_irq_put(rdev); + radeon_irq_kms_sw_irq_put(rdev, fence->ring); if (unlikely(r < 0)) { return r; } } else { - radeon_irq_kms_sw_irq_get(rdev); - r = wait_event_timeout(rdev->fence_drv.queue, + radeon_irq_kms_sw_irq_get(rdev, fence->ring); + r = wait_event_timeout(rdev->fence_drv[fence->ring].queue, radeon_fence_signaled(fence), timeout); - radeon_irq_kms_sw_irq_put(rdev); + radeon_irq_kms_sw_irq_put(rdev, fence->ring); } trace_radeon_fence_wait_end(rdev->ddev, seq); if (unlikely(!radeon_fence_signaled(fence))) { @@ -258,10 +255,11 @@ retry: timeout = r; goto retry; } - /* don't protect read access to rdev->fence_drv.last_seq + /* don't protect read access to rdev->fence_drv[t].last_seq * if we experiencing a lockup the value doesn't change */ - if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) { + if (seq == rdev->fence_drv[fence->ring].last_seq && + radeon_gpu_is_lockup(rdev, &rdev->ring[fence->ring])) { /* good news we believe it's a lockup */ printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", fence->seq, seq); @@ -272,20 +270,20 @@ retry: r = radeon_gpu_reset(rdev); if (r) return r; - radeon_fence_write(rdev, fence->seq); + radeon_fence_write(rdev, fence->seq, fence->ring); rdev->gpu_lockup = false; } timeout = RADEON_FENCE_JIFFIES_TIMEOUT; - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - rdev->fence_drv.last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; - rdev->fence_drv.last_jiffies = jiffies; - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; + rdev->fence_drv[fence->ring].last_jiffies = jiffies; + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); goto retry; } return 0; } -int radeon_fence_wait_next(struct radeon_device *rdev) +int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { unsigned long irq_flags; struct radeon_fence *fence; @@ -294,21 +292,21 @@ int radeon_fence_wait_next(struct radeon_device *rdev) if (rdev->gpu_lockup) { return 0; } - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - if (list_empty(&rdev->fence_drv.emited)) { - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + if (list_empty(&rdev->fence_drv[ring].emitted)) { + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } - fence = list_entry(rdev->fence_drv.emited.next, + fence = list_entry(rdev->fence_drv[ring].emitted.next, struct radeon_fence, list); radeon_fence_ref(fence); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); r = radeon_fence_wait(fence, false); radeon_fence_unref(&fence); return r; } -int radeon_fence_wait_last(struct radeon_device *rdev) +int radeon_fence_wait_last(struct radeon_device *rdev, int ring) { unsigned long irq_flags; struct radeon_fence *fence; @@ -317,15 +315,15 @@ int radeon_fence_wait_last(struct radeon_device *rdev) if (rdev->gpu_lockup) { return 0; } - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - if (list_empty(&rdev->fence_drv.emited)) { - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + if (list_empty(&rdev->fence_drv[ring].emitted)) { + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } - fence = list_entry(rdev->fence_drv.emited.prev, + fence = list_entry(rdev->fence_drv[ring].emitted.prev, struct radeon_fence, list); radeon_fence_ref(fence); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); r = radeon_fence_wait(fence, false); radeon_fence_unref(&fence); return r; @@ -347,39 +345,95 @@ void radeon_fence_unref(struct radeon_fence **fence) } } -void radeon_fence_process(struct radeon_device *rdev) +void radeon_fence_process(struct radeon_device *rdev, int ring) { unsigned long irq_flags; bool wake; - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - wake = radeon_fence_poll_locked(rdev); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + wake = radeon_fence_poll_locked(rdev, ring); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); if (wake) { - wake_up_all(&rdev->fence_drv.queue); + wake_up_all(&rdev->fence_drv[ring].queue); } } -int radeon_fence_driver_init(struct radeon_device *rdev) +int radeon_fence_count_emitted(struct radeon_device *rdev, int ring) +{ + unsigned long irq_flags; + int not_processed = 0; + + read_lock_irqsave(&rdev->fence_lock, irq_flags); + if (!rdev->fence_drv[ring].initialized) + return 0; + + if (!list_empty(&rdev->fence_drv[ring].emitted)) { + struct list_head *ptr; + list_for_each(ptr, &rdev->fence_drv[ring].emitted) { + /* count up to 3, that's enought info */ + if (++not_processed >= 3) + break; + } + } + read_unlock_irqrestore(&rdev->fence_lock, irq_flags); + return not_processed; +} + +int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { unsigned long irq_flags; + uint64_t index; int r; - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - r = radeon_scratch_get(rdev, &rdev->fence_drv.scratch_reg); - if (r) { - dev_err(rdev->dev, "fence failed to get scratch register\n"); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); - return r; + write_lock_irqsave(&rdev->fence_lock, irq_flags); + radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); + if (rdev->wb.use_event) { + rdev->fence_drv[ring].scratch_reg = 0; + index = R600_WB_EVENT_OFFSET + ring * 4; + } else { + r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); + if (r) { + dev_err(rdev->dev, "fence failed to get scratch register\n"); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + return r; + } + index = RADEON_WB_SCRATCH_OFFSET + + rdev->fence_drv[ring].scratch_reg - + rdev->scratch.reg_base; } - radeon_fence_write(rdev, 0); - atomic_set(&rdev->fence_drv.seq, 0); - INIT_LIST_HEAD(&rdev->fence_drv.created); - INIT_LIST_HEAD(&rdev->fence_drv.emited); - INIT_LIST_HEAD(&rdev->fence_drv.signaled); - init_waitqueue_head(&rdev->fence_drv.queue); - rdev->fence_drv.initialized = true; - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; + radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring); + rdev->fence_drv[ring].initialized = true; + DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n", + ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + return 0; +} + +static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) +{ + rdev->fence_drv[ring].scratch_reg = -1; + rdev->fence_drv[ring].cpu_addr = NULL; + rdev->fence_drv[ring].gpu_addr = 0; + atomic_set(&rdev->fence_drv[ring].seq, 0); + INIT_LIST_HEAD(&rdev->fence_drv[ring].created); + INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); + INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); + init_waitqueue_head(&rdev->fence_drv[ring].queue); + rdev->fence_drv[ring].initialized = false; +} + +int radeon_fence_driver_init(struct radeon_device *rdev) +{ + unsigned long irq_flags; + int ring; + + write_lock_irqsave(&rdev->fence_lock, irq_flags); + for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { + radeon_fence_driver_init_ring(rdev, ring); + } + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); if (radeon_debugfs_fence_init(rdev)) { dev_err(rdev->dev, "fence debugfs file creation failed\n"); } @@ -389,14 +443,18 @@ int radeon_fence_driver_init(struct radeon_device *rdev) void radeon_fence_driver_fini(struct radeon_device *rdev) { unsigned long irq_flags; - - if (!rdev->fence_drv.initialized) - return; - wake_up_all(&rdev->fence_drv.queue); - write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - radeon_scratch_free(rdev, rdev->fence_drv.scratch_reg); - write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); - rdev->fence_drv.initialized = false; + int ring; + + for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { + if (!rdev->fence_drv[ring].initialized) + continue; + radeon_fence_wait_last(rdev, ring); + wake_up_all(&rdev->fence_drv[ring].queue); + write_lock_irqsave(&rdev->fence_lock, irq_flags); + radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + rdev->fence_drv[ring].initialized = false; + } } @@ -410,14 +468,21 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_fence *fence; - - seq_printf(m, "Last signaled fence 0x%08X\n", - radeon_fence_read(rdev)); - if (!list_empty(&rdev->fence_drv.emited)) { - fence = list_entry(rdev->fence_drv.emited.prev, - struct radeon_fence, list); - seq_printf(m, "Last emited fence %p with 0x%08X\n", - fence, fence->seq); + int i; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!rdev->fence_drv[i].initialized) + continue; + + seq_printf(m, "--- ring %d ---\n", i); + seq_printf(m, "Last signaled fence 0x%08X\n", + radeon_fence_read(rdev, i)); + if (!list_empty(&rdev->fence_drv[i].emitted)) { + fence = list_entry(rdev->fence_drv[i].emitted.prev, + struct radeon_fence, list); + seq_printf(m, "Last emitted fence %p with 0x%08X\n", + fence, fence->seq); + } } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index ba7ab79e12c1..010dad8b66ae 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -157,9 +157,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) { if (rdev->gart.pages[p]) { - if (!rdev->gart.ttm_alloced[p]) - pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); rdev->gart.pages[p] = NULL; rdev->gart.pages_addr[p] = rdev->dummy_page.addr; page_base = rdev->gart.pages_addr[p]; @@ -191,23 +188,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) { - /* we reverted the patch using dma_addr in TTM for now but this - * code stops building on alpha so just comment it out for now */ - if (0) { /*dma_addr[i] != DMA_ERROR_CODE) */ - rdev->gart.ttm_alloced[p] = true; - rdev->gart.pages_addr[p] = dma_addr[i]; - } else { - /* we need to support large memory configurations */ - /* assume that unbind have already been call on the range */ - rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i], - 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) { - /* FIXME: failed to map page (return -ENOMEM?) */ - radeon_gart_unbind(rdev, offset, pages); - return -ENOMEM; - } - } + rdev->gart.pages_addr[p] = dma_addr[i]; rdev->gart.pages[p] = pagelist[i]; if (rdev->gart.ptr) { page_base = rdev->gart.pages_addr[p]; @@ -274,12 +255,6 @@ int radeon_gart_init(struct radeon_device *rdev) radeon_gart_fini(rdev); return -ENOMEM; } - rdev->gart.ttm_alloced = kzalloc(sizeof(bool) * - rdev->gart.num_cpu_pages, GFP_KERNEL); - if (rdev->gart.ttm_alloced == NULL) { - radeon_gart_fini(rdev); - return -ENOMEM; - } /* set GART entry to point to the dummy page by default */ for (i = 0; i < rdev->gart.num_cpu_pages; i++) { rdev->gart.pages_addr[i] = rdev->dummy_page.addr; @@ -296,10 +271,404 @@ void radeon_gart_fini(struct radeon_device *rdev) rdev->gart.ready = false; kfree(rdev->gart.pages); kfree(rdev->gart.pages_addr); - kfree(rdev->gart.ttm_alloced); rdev->gart.pages = NULL; rdev->gart.pages_addr = NULL; - rdev->gart.ttm_alloced = NULL; radeon_dummy_page_fini(rdev); } + +/* + * vm helpers + * + * TODO bind a default page at vm initialization for default address + */ +int radeon_vm_manager_init(struct radeon_device *rdev) +{ + int r; + + rdev->vm_manager.enabled = false; + + /* mark first vm as always in use, it's the system one */ + r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, + rdev->vm_manager.max_pfn * 8, + RADEON_GEM_DOMAIN_VRAM); + if (r) { + dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", + (rdev->vm_manager.max_pfn * 8) >> 10); + return r; + } + + r = rdev->vm_manager.funcs->init(rdev); + if (r == 0) + rdev->vm_manager.enabled = true; + + return r; +} + +/* cs mutex must be lock */ +static void radeon_vm_unbind_locked(struct radeon_device *rdev, + struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va; + + if (vm->id == -1) { + return; + } + + /* wait for vm use to end */ + if (vm->fence) { + radeon_fence_wait(vm->fence, false); + radeon_fence_unref(&vm->fence); + } + + /* hw unbind */ + rdev->vm_manager.funcs->unbind(rdev, vm); + rdev->vm_manager.use_bitmap &= ~(1 << vm->id); + list_del_init(&vm->list); + vm->id = -1; + radeon_sa_bo_free(rdev, &vm->sa_bo); + vm->pt = NULL; + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } +} + +void radeon_vm_manager_fini(struct radeon_device *rdev) +{ + if (rdev->vm_manager.sa_manager.bo == NULL) + return; + radeon_vm_manager_suspend(rdev); + rdev->vm_manager.funcs->fini(rdev); + radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); + rdev->vm_manager.enabled = false; +} + +int radeon_vm_manager_start(struct radeon_device *rdev) +{ + if (rdev->vm_manager.sa_manager.bo == NULL) { + return -EINVAL; + } + return radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); +} + +int radeon_vm_manager_suspend(struct radeon_device *rdev) +{ + struct radeon_vm *vm, *tmp; + + radeon_mutex_lock(&rdev->cs_mutex); + /* unbind all active vm */ + list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { + radeon_vm_unbind_locked(rdev, vm); + } + rdev->vm_manager.funcs->fini(rdev); + radeon_mutex_unlock(&rdev->cs_mutex); + return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); +} + +/* cs mutex must be lock */ +void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) +{ + mutex_lock(&vm->mutex); + radeon_vm_unbind_locked(rdev, vm); + mutex_unlock(&vm->mutex); +} + +/* cs mutex must be lock & vm mutex must be lock */ +int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + unsigned i; + int id = -1, r; + + if (vm == NULL) { + return -EINVAL; + } + + if (vm->id != -1) { + /* update lru */ + list_del_init(&vm->list); + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return 0; + } + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, + RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8), + RADEON_GPU_PAGE_SIZE); + if (r) { + if (list_empty(&rdev->vm_manager.lru_vm)) { + return r; + } + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); + radeon_vm_unbind(rdev, vm_evict); + goto retry; + } + vm->pt = rdev->vm_manager.sa_manager.cpu_ptr; + vm->pt += (vm->sa_bo.offset >> 3); + vm->pt_gpu_addr = rdev->vm_manager.sa_manager.gpu_addr; + vm->pt_gpu_addr += vm->sa_bo.offset; + memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8)); + +retry_id: + /* search for free vm */ + for (i = 0; i < rdev->vm_manager.nvm; i++) { + if (!(rdev->vm_manager.use_bitmap & (1 << i))) { + id = i; + break; + } + } + /* evict vm if necessary */ + if (id == -1) { + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); + radeon_vm_unbind(rdev, vm_evict); + goto retry_id; + } + + /* do hw bind */ + r = rdev->vm_manager.funcs->bind(rdev, vm, id); + if (r) { + radeon_sa_bo_free(rdev, &vm->sa_bo); + return r; + } + rdev->vm_manager.use_bitmap |= 1 << id; + vm->id = id; + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return radeon_vm_bo_update_pte(rdev, vm, rdev->ib_pool.sa_manager.bo, + &rdev->ib_pool.sa_manager.bo->tbo.mem); +} + +/* object have to be reserved */ +int radeon_vm_bo_add(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + uint64_t offset, + uint32_t flags) +{ + struct radeon_bo_va *bo_va, *tmp; + struct list_head *head; + uint64_t size = radeon_bo_size(bo), last_offset = 0; + unsigned last_pfn; + + bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (bo_va == NULL) { + return -ENOMEM; + } + bo_va->vm = vm; + bo_va->bo = bo; + bo_va->soffset = offset; + bo_va->eoffset = offset + size; + bo_va->flags = flags; + bo_va->valid = false; + INIT_LIST_HEAD(&bo_va->bo_list); + INIT_LIST_HEAD(&bo_va->vm_list); + /* make sure object fit at this offset */ + if (bo_va->soffset >= bo_va->eoffset) { + kfree(bo_va); + return -EINVAL; + } + + last_pfn = bo_va->eoffset / RADEON_GPU_PAGE_SIZE; + if (last_pfn > rdev->vm_manager.max_pfn) { + kfree(bo_va); + dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + last_pfn, rdev->vm_manager.max_pfn); + return -EINVAL; + } + + mutex_lock(&vm->mutex); + if (last_pfn > vm->last_pfn) { + /* grow va space 32M by 32M */ + unsigned align = ((32 << 20) >> 12) - 1; + radeon_mutex_lock(&rdev->cs_mutex); + radeon_vm_unbind_locked(rdev, vm); + radeon_mutex_unlock(&rdev->cs_mutex); + vm->last_pfn = (last_pfn + align) & ~align; + } + head = &vm->va; + last_offset = 0; + list_for_each_entry(tmp, &vm->va, vm_list) { + if (bo_va->soffset >= last_offset && bo_va->eoffset < tmp->soffset) { + /* bo can be added before this one */ + break; + } + if (bo_va->soffset >= tmp->soffset && bo_va->soffset < tmp->eoffset) { + /* bo and tmp overlap, invalid offset */ + dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", + bo, (unsigned)bo_va->soffset, tmp->bo, + (unsigned)tmp->soffset, (unsigned)tmp->eoffset); + kfree(bo_va); + mutex_unlock(&vm->mutex); + return -EINVAL; + } + last_offset = tmp->eoffset; + head = &tmp->vm_list; + } + list_add(&bo_va->vm_list, head); + list_add_tail(&bo_va->bo_list, &bo->va); + mutex_unlock(&vm->mutex); + return 0; +} + +static u64 radeon_vm_get_addr(struct radeon_device *rdev, + struct ttm_mem_reg *mem, + unsigned pfn) +{ + u64 addr = 0; + + switch (mem->mem_type) { + case TTM_PL_VRAM: + addr = (mem->start << PAGE_SHIFT); + addr += pfn * RADEON_GPU_PAGE_SIZE; + addr += rdev->vm_manager.vram_base_offset; + break; + case TTM_PL_TT: + /* offset inside page table */ + addr = mem->start << PAGE_SHIFT; + addr += pfn * RADEON_GPU_PAGE_SIZE; + addr = addr >> PAGE_SHIFT; + /* page table offset */ + addr = rdev->gart.pages_addr[addr]; + /* in case cpu page size != gpu page size*/ + addr += (pfn * RADEON_GPU_PAGE_SIZE) & (~PAGE_MASK); + break; + default: + break; + } + return addr; +} + +/* object have to be reserved & cs mutex took & vm mutex took */ +int radeon_vm_bo_update_pte(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + struct ttm_mem_reg *mem) +{ + struct radeon_bo_va *bo_va; + unsigned ngpu_pages, i; + uint64_t addr = 0, pfn; + uint32_t flags; + + /* nothing to do if vm isn't bound */ + if (vm->id == -1) + return 0;; + + bo_va = radeon_bo_va(bo, vm); + if (bo_va == NULL) { + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); + return -EINVAL; + } + + if (bo_va->valid) + return 0; + + ngpu_pages = radeon_bo_ngpu_pages(bo); + bo_va->flags &= ~RADEON_VM_PAGE_VALID; + bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; + if (mem) { + if (mem->mem_type != TTM_PL_SYSTEM) { + bo_va->flags |= RADEON_VM_PAGE_VALID; + bo_va->valid = true; + } + if (mem->mem_type == TTM_PL_TT) { + bo_va->flags |= RADEON_VM_PAGE_SYSTEM; + } + } + pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; + flags = rdev->vm_manager.funcs->page_flags(rdev, bo_va->vm, bo_va->flags); + for (i = 0, addr = 0; i < ngpu_pages; i++) { + if (mem && bo_va->valid) { + addr = radeon_vm_get_addr(rdev, mem, i); + } + rdev->vm_manager.funcs->set_page(rdev, bo_va->vm, i + pfn, addr, flags); + } + rdev->vm_manager.funcs->tlb_flush(rdev, bo_va->vm); + return 0; +} + +/* object have to be reserved */ +int radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + bo_va = radeon_bo_va(bo, vm); + if (bo_va == NULL) + return 0; + + list_del(&bo_va->bo_list); + mutex_lock(&vm->mutex); + radeon_mutex_lock(&rdev->cs_mutex); + radeon_vm_bo_update_pte(rdev, vm, bo, NULL); + radeon_mutex_unlock(&rdev->cs_mutex); + list_del(&bo_va->vm_list); + mutex_unlock(&vm->mutex); + + kfree(bo_va); + return 0; +} + +void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + BUG_ON(!atomic_read(&bo->tbo.reserved)); + list_for_each_entry(bo_va, &bo->va, bo_list) { + bo_va->valid = false; + } +} + +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +{ + int r; + + vm->id = -1; + vm->fence = NULL; + mutex_init(&vm->mutex); + INIT_LIST_HEAD(&vm->list); + INIT_LIST_HEAD(&vm->va); + vm->last_pfn = 0; + /* map the ib pool buffer at 0 in virtual address space, set + * read only + */ + r = radeon_vm_bo_add(rdev, vm, rdev->ib_pool.sa_manager.bo, 0, + RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); + return r; +} + +void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va, *tmp; + int r; + + mutex_lock(&vm->mutex); + + radeon_mutex_lock(&rdev->cs_mutex); + radeon_vm_unbind_locked(rdev, vm); + radeon_mutex_unlock(&rdev->cs_mutex); + + /* remove all bo */ + r = radeon_bo_reserve(rdev->ib_pool.sa_manager.bo, false); + if (!r) { + bo_va = radeon_bo_va(rdev->ib_pool.sa_manager.bo, vm); + list_del_init(&bo_va->bo_list); + list_del_init(&bo_va->vm_list); + radeon_bo_unreserve(rdev->ib_pool.sa_manager.bo); + kfree(bo_va); + } + if (!list_empty(&vm->va)) { + dev_err(rdev->dev, "still active bo inside vm\n"); + } + list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { + list_del_init(&bo_va->vm_list); + r = radeon_bo_reserve(bo_va->bo, false); + if (!r) { + list_del_init(&bo_va->bo_list); + radeon_bo_unreserve(bo_va->bo); + kfree(bo_va); + } + } + mutex_unlock(&vm->mutex); +} diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index aa1ca2dea42f..7337850af2fa 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -142,6 +142,44 @@ void radeon_gem_fini(struct radeon_device *rdev) radeon_bo_force_delete(rdev); } +/* + * Call from drm_gem_handle_create which appear in both new and open ioctl + * case. + */ +int radeon_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv) +{ + return 0; +} + +void radeon_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) +{ + struct radeon_bo *rbo = gem_to_radeon_bo(obj); + struct radeon_device *rdev = rbo->rdev; + struct radeon_fpriv *fpriv = file_priv->driver_priv; + struct radeon_vm *vm = &fpriv->vm; + struct radeon_bo_va *bo_va, *tmp; + + if (rdev->family < CHIP_CAYMAN) { + return; + } + + if (radeon_bo_reserve(rbo, false)) { + return; + } + list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { + if (bo_va->vm == vm) { + /* remove from this vm address space */ + mutex_lock(&vm->mutex); + list_del(&bo_va->vm_list); + mutex_unlock(&vm->mutex); + list_del(&bo_va->bo_list); + kfree(bo_va); + } + } + radeon_bo_unreserve(rbo); +} + /* * GEM ioctls. @@ -152,6 +190,7 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_info *args = data; struct ttm_mem_type_manager *man; + unsigned i; man = &rdev->mman.bdev.man[TTM_PL_VRAM]; @@ -160,8 +199,9 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, if (rdev->stollen_vga_memory) args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory); args->vram_visible -= radeon_fbdev_total_size(rdev); - args->gart_size = rdev->mc.gtt_size - rdev->cp.ring_size - 4096 - - RADEON_IB_POOL_SIZE*64*1024; + args->gart_size = rdev->mc.gtt_size - 4096 - RADEON_IB_POOL_SIZE*64*1024; + for(i = 0; i < RADEON_NUM_RINGS; ++i) + args->gart_size -= rdev->ring[i].ring_size; return 0; } @@ -352,6 +392,109 @@ out: return r; } +int radeon_gem_va_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_va *args = data; + struct drm_gem_object *gobj; + struct radeon_device *rdev = dev->dev_private; + struct radeon_fpriv *fpriv = filp->driver_priv; + struct radeon_bo *rbo; + struct radeon_bo_va *bo_va; + u32 invalid_flags; + int r = 0; + + if (!rdev->vm_manager.enabled) { + args->operation = RADEON_VA_RESULT_ERROR; + return -ENOTTY; + } + + /* !! DONT REMOVE !! + * We don't support vm_id yet, to be sure we don't have have broken + * userspace, reject anyone trying to use non 0 value thus moving + * forward we can use those fields without breaking existant userspace + */ + if (args->vm_id) { + args->operation = RADEON_VA_RESULT_ERROR; + return -EINVAL; + } + + if (args->offset < RADEON_VA_RESERVED_SIZE) { + dev_err(&dev->pdev->dev, + "offset 0x%lX is in reserved area 0x%X\n", + (unsigned long)args->offset, + RADEON_VA_RESERVED_SIZE); + args->operation = RADEON_VA_RESULT_ERROR; + return -EINVAL; + } + + /* don't remove, we need to enforce userspace to set the snooped flag + * otherwise we will endup with broken userspace and we won't be able + * to enable this feature without adding new interface + */ + invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM; + if ((args->flags & invalid_flags)) { + dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", + args->flags, invalid_flags); + args->operation = RADEON_VA_RESULT_ERROR; + return -EINVAL; + } + if (!(args->flags & RADEON_VM_PAGE_SNOOPED)) { + dev_err(&dev->pdev->dev, "only supported snooped mapping for now\n"); + args->operation = RADEON_VA_RESULT_ERROR; + return -EINVAL; + } + + switch (args->operation) { + case RADEON_VA_MAP: + case RADEON_VA_UNMAP: + break; + default: + dev_err(&dev->pdev->dev, "unsupported operation %d\n", + args->operation); + args->operation = RADEON_VA_RESULT_ERROR; + return -EINVAL; + } + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + args->operation = RADEON_VA_RESULT_ERROR; + return -ENOENT; + } + rbo = gem_to_radeon_bo(gobj); + r = radeon_bo_reserve(rbo, false); + if (r) { + args->operation = RADEON_VA_RESULT_ERROR; + drm_gem_object_unreference_unlocked(gobj); + return r; + } + switch (args->operation) { + case RADEON_VA_MAP: + bo_va = radeon_bo_va(rbo, &fpriv->vm); + if (bo_va) { + args->operation = RADEON_VA_RESULT_VA_EXIST; + args->offset = bo_va->soffset; + goto out; + } + r = radeon_vm_bo_add(rdev, &fpriv->vm, rbo, + args->offset, args->flags); + break; + case RADEON_VA_UNMAP: + r = radeon_vm_bo_rmv(rdev, &fpriv->vm, rbo); + break; + default: + break; + } + args->operation = RADEON_VA_RESULT_OK; + if (r) { + args->operation = RADEON_VA_RESULT_ERROR; + } +out: + radeon_bo_unreserve(rbo); + drm_gem_object_unreference_unlocked(gobj); + return r; +} + int radeon_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 8f86aeb26693..be38921bf761 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -65,7 +65,8 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) unsigned i; /* Disable *all* interrupts */ - rdev->irq.sw_int = false; + for (i = 0; i < RADEON_NUM_RINGS; i++) + rdev->irq.sw_int[i] = false; rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; @@ -81,9 +82,11 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned i; dev->max_vblank_count = 0x001fffff; - rdev->irq.sw_int = true; + for (i = 0; i < RADEON_NUM_RINGS; i++) + rdev->irq.sw_int[i] = true; radeon_irq_set(rdev); return 0; } @@ -97,7 +100,8 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) return; } /* Disable *all* interrupts */ - rdev->irq.sw_int = false; + for (i = 0; i < RADEON_NUM_RINGS; i++) + rdev->irq.sw_int[i] = false; rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; @@ -194,26 +198,26 @@ void radeon_irq_kms_fini(struct radeon_device *rdev) flush_work_sync(&rdev->hotplug_work); } -void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev) +void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags; spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount == 1)) { - rdev->irq.sw_int = true; + if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { + rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); } spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); } -void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev) +void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags; spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount == 0)) { - rdev->irq.sw_int = false; + BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); + if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { + rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); } spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index be2c1224e68a..d3352889a870 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -250,6 +250,18 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } break; + case RADEON_INFO_VA_START: + /* this is where we report if vm is supported or not */ + if (rdev->family < CHIP_CAYMAN) + return -EINVAL; + value = RADEON_VA_RESERVED_SIZE; + break; + case RADEON_INFO_IB_VM_MAX_SIZE: + /* this is where we report if vm is supported or not */ + if (rdev->family < CHIP_CAYMAN) + return -EINVAL; + value = RADEON_IB_VM_MAX_SIZE; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; @@ -270,7 +282,6 @@ int radeon_driver_firstopen_kms(struct drm_device *dev) return 0; } - void radeon_driver_lastclose_kms(struct drm_device *dev) { vga_switcheroo_process_delayed_switch(); @@ -278,12 +289,45 @@ void radeon_driver_lastclose_kms(struct drm_device *dev) int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { + struct radeon_device *rdev = dev->dev_private; + + file_priv->driver_priv = NULL; + + /* new gpu have virtual address space support */ + if (rdev->family >= CHIP_CAYMAN) { + struct radeon_fpriv *fpriv; + int r; + + fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); + if (unlikely(!fpriv)) { + return -ENOMEM; + } + + r = radeon_vm_init(rdev, &fpriv->vm); + if (r) { + radeon_vm_fini(rdev, &fpriv->vm); + kfree(fpriv); + return r; + } + + file_priv->driver_priv = fpriv; + } return 0; } void radeon_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv) { + struct radeon_device *rdev = dev->dev_private; + + /* new gpu have virtual address space support */ + if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) { + struct radeon_fpriv *fpriv = file_priv->driver_priv; + + radeon_vm_fini(rdev, &fpriv->vm); + kfree(fpriv); + file_priv->driver_priv = NULL; + } } void radeon_driver_preclose_kms(struct drm_device *dev, @@ -451,5 +495,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index daadf2111040..25a19c483075 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -437,7 +437,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, crtc_offset_cntl = 0; - pitch_pixels = target_fb->pitch / (target_fb->bits_per_pixel / 8); + pitch_pixels = target_fb->pitches[0] / (target_fb->bits_per_pixel / 8); crtc_pitch = (((pitch_pixels * target_fb->bits_per_pixel) + ((target_fb->bits_per_pixel * 8) - 1)) / (target_fb->bits_per_pixel * 8)); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 2c2e75ef8a37..08ff857c8fd6 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -643,7 +643,7 @@ extern void radeon_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green u16 *blue, int regno); void radeon_framebuffer_init(struct drm_device *dev, struct radeon_framebuffer *rfb, - struct drm_mode_fb_cmd *mode_cmd, + struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1c851521f458..d45df1763598 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -46,6 +46,20 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); * function are calling it. */ +void radeon_bo_clear_va(struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va, *tmp; + + list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { + /* remove from all vm address space */ + mutex_lock(&bo_va->vm->mutex); + list_del(&bo_va->vm_list); + mutex_unlock(&bo_va->vm->mutex); + list_del(&bo_va->bo_list); + kfree(bo_va); + } +} + static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct radeon_bo *bo; @@ -55,6 +69,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); radeon_bo_clear_surface_reg(bo); + radeon_bo_clear_va(bo); drm_gem_object_release(&bo->gem_base); kfree(bo); } @@ -95,6 +110,7 @@ int radeon_bo_create(struct radeon_device *rdev, enum ttm_bo_type type; unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; unsigned long max_size = 0; + size_t acc_size; int r; size = ALIGN(size, PAGE_SIZE); @@ -117,6 +133,9 @@ int radeon_bo_create(struct radeon_device *rdev, return -ENOMEM; } + acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size, + sizeof(struct radeon_bo)); + retry: bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); if (bo == NULL) @@ -130,12 +149,13 @@ retry: bo->gem_base.driver_private = NULL; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); + INIT_LIST_HEAD(&bo->va); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ mutex_lock(&rdev->vram_mutex); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, 0, !kernel, NULL, size, - &radeon_ttm_bo_destroy); + &bo->placement, page_align, 0, !kernel, NULL, + acc_size, &radeon_ttm_bo_destroy); mutex_unlock(&rdev->vram_mutex); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) { @@ -483,6 +503,7 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo, return; rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); + radeon_vm_bo_invalidate(rbo->rdev, rbo); } int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) @@ -556,3 +577,16 @@ int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait) } return 0; } + +/* object have to be reserved */ +struct radeon_bo_va *radeon_bo_va(struct radeon_bo *rbo, struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &rbo->va, bo_list) { + if (bo_va->vm == vm) { + return bo_va; + } + } + return NULL; +} diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index b07f0f9b8627..cde430308870 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -83,6 +83,16 @@ static inline bool radeon_bo_is_reserved(struct radeon_bo *bo) return !!atomic_read(&bo->tbo.reserved); } +static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo) +{ + return (bo->tbo.num_pages << PAGE_SHIFT) / RADEON_GPU_PAGE_SIZE; +} + +static inline unsigned radeon_bo_gpu_page_alignment(struct radeon_bo *bo) +{ + return (bo->tbo.mem.page_alignment << PAGE_SHIFT) / RADEON_GPU_PAGE_SIZE; +} + /** * radeon_bo_mmap_offset - return mmap offset of bo * @bo: radeon object for which we query the offset @@ -128,4 +138,26 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); +extern struct radeon_bo_va *radeon_bo_va(struct radeon_bo *rbo, + struct radeon_vm *vm); + +/* + * sub allocation + */ +extern int radeon_sa_bo_manager_init(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + unsigned size, u32 domain); +extern void radeon_sa_bo_manager_fini(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager); +extern int radeon_sa_bo_manager_start(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager); +extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager); +extern int radeon_sa_bo_new(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + struct radeon_sa_bo *sa_bo, + unsigned size, unsigned align); +extern void radeon_sa_bo_free(struct radeon_device *rdev, + struct radeon_sa_bo *sa_bo); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 78a665bd9519..095148e29a1f 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -252,7 +252,10 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) mutex_lock(&rdev->ddev->struct_mutex); mutex_lock(&rdev->vram_mutex); - mutex_lock(&rdev->cp.mutex); + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (rdev->ring[i].ring_obj) + mutex_lock(&rdev->ring[i].mutex); + } /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { @@ -268,12 +271,13 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) radeon_irq_set(rdev); } } else { - if (rdev->cp.ready) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + if (ring->ready) { struct radeon_fence *fence; - radeon_ring_alloc(rdev, 64); - radeon_fence_create(rdev, &fence); + radeon_ring_alloc(rdev, ring, 64); + radeon_fence_create(rdev, &fence, radeon_ring_index(rdev, ring)); radeon_fence_emit(rdev, fence); - radeon_ring_commit(rdev); + radeon_ring_commit(rdev, ring); radeon_fence_wait(fence, false); radeon_fence_unref(&fence); } @@ -307,7 +311,10 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE; - mutex_unlock(&rdev->cp.mutex); + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (rdev->ring[i].ring_obj) + mutex_unlock(&rdev->ring[i].mutex); + } mutex_unlock(&rdev->vram_mutex); mutex_unlock(&rdev->ddev->struct_mutex); } @@ -795,19 +802,14 @@ static void radeon_dynpm_idle_work_handler(struct work_struct *work) resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); mutex_lock(&rdev->pm.mutex); if (rdev->pm.dynpm_state == DYNPM_STATE_ACTIVE) { - unsigned long irq_flags; int not_processed = 0; + int i; - read_lock_irqsave(&rdev->fence_drv.lock, irq_flags); - if (!list_empty(&rdev->fence_drv.emited)) { - struct list_head *ptr; - list_for_each(ptr, &rdev->fence_drv.emited) { - /* count up to 3, that's enought info */ - if (++not_processed >= 3) - break; - } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + not_processed += radeon_fence_count_emitted(rdev, i); + if (not_processed >= 3) + break; } - read_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); if (not_processed >= 3) { /* should upclock */ if (rdev->pm.dynpm_planned_action == DYNPM_ACTION_DOWNCLOCK) { diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 49d58202202c..e8bc70933d1b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -34,6 +34,7 @@ #include "atom.h" int radeon_debugfs_ib_init(struct radeon_device *rdev); +int radeon_debugfs_ring_init(struct radeon_device *rdev); u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) { @@ -60,105 +61,106 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) return idx_value; } -void radeon_ring_write(struct radeon_device *rdev, uint32_t v) +void radeon_ring_write(struct radeon_ring *ring, uint32_t v) { #if DRM_DEBUG_CODE - if (rdev->cp.count_dw <= 0) { + if (ring->count_dw <= 0) { DRM_ERROR("radeon: writting more dword to ring than expected !\n"); } #endif - rdev->cp.ring[rdev->cp.wptr++] = v; - rdev->cp.wptr &= rdev->cp.ptr_mask; - rdev->cp.count_dw--; - rdev->cp.ring_free_dw--; + ring->ring[ring->wptr++] = v; + ring->wptr &= ring->ptr_mask; + ring->count_dw--; + ring->ring_free_dw--; } -void radeon_ib_bogus_cleanup(struct radeon_device *rdev) -{ - struct radeon_ib *ib, *n; - - list_for_each_entry_safe(ib, n, &rdev->ib_pool.bogus_ib, list) { - list_del(&ib->list); - vfree(ib->ptr); - kfree(ib); - } -} - -void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib) +/* + * IB. + */ +bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ib *bib; - - bib = kmalloc(sizeof(*bib), GFP_KERNEL); - if (bib == NULL) - return; - bib->ptr = vmalloc(ib->length_dw * 4); - if (bib->ptr == NULL) { - kfree(bib); - return; + bool done = false; + + /* only free ib which have been emited */ + if (ib->fence && ib->fence->emitted) { + if (radeon_fence_signaled(ib->fence)) { + radeon_fence_unref(&ib->fence); + radeon_sa_bo_free(rdev, &ib->sa_bo); + done = true; + } } - memcpy(bib->ptr, ib->ptr, ib->length_dw * 4); - bib->length_dw = ib->length_dw; - mutex_lock(&rdev->ib_pool.mutex); - list_add_tail(&bib->list, &rdev->ib_pool.bogus_ib); - mutex_unlock(&rdev->ib_pool.mutex); + return done; } -/* - * IB. - */ -int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib) +int radeon_ib_get(struct radeon_device *rdev, int ring, + struct radeon_ib **ib, unsigned size) { struct radeon_fence *fence; - struct radeon_ib *nib; - int r = 0, i, c; + unsigned cretry = 0; + int r = 0, i, idx; *ib = NULL; - r = radeon_fence_create(rdev, &fence); + /* align size on 256 bytes */ + size = ALIGN(size, 256); + + r = radeon_fence_create(rdev, &fence, ring); if (r) { dev_err(rdev->dev, "failed to create fence for new IB\n"); return r; } + mutex_lock(&rdev->ib_pool.mutex); - for (i = rdev->ib_pool.head_id, c = 0, nib = NULL; c < RADEON_IB_POOL_SIZE; c++, i++) { - i &= (RADEON_IB_POOL_SIZE - 1); - if (rdev->ib_pool.ibs[i].free) { - nib = &rdev->ib_pool.ibs[i]; - break; - } - } - if (nib == NULL) { - /* This should never happen, it means we allocated all - * IB and haven't scheduled one yet, return EBUSY to - * userspace hoping that on ioctl recall we get better - * luck - */ - dev_err(rdev->dev, "no free indirect buffer !\n"); + idx = rdev->ib_pool.head_id; +retry: + if (cretry > 5) { + dev_err(rdev->dev, "failed to get an ib after 5 retry\n"); mutex_unlock(&rdev->ib_pool.mutex); radeon_fence_unref(&fence); - return -EBUSY; + return -ENOMEM; } - rdev->ib_pool.head_id = (nib->idx + 1) & (RADEON_IB_POOL_SIZE - 1); - nib->free = false; - if (nib->fence) { - mutex_unlock(&rdev->ib_pool.mutex); - r = radeon_fence_wait(nib->fence, false); - if (r) { - dev_err(rdev->dev, "error waiting fence of IB(%u:0x%016lX:%u)\n", - nib->idx, (unsigned long)nib->gpu_addr, nib->length_dw); - mutex_lock(&rdev->ib_pool.mutex); - nib->free = true; - mutex_unlock(&rdev->ib_pool.mutex); - radeon_fence_unref(&fence); - return r; + cretry++; + for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { + radeon_ib_try_free(rdev, &rdev->ib_pool.ibs[idx]); + if (rdev->ib_pool.ibs[idx].fence == NULL) { + r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager, + &rdev->ib_pool.ibs[idx].sa_bo, + size, 256); + if (!r) { + *ib = &rdev->ib_pool.ibs[idx]; + (*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr; + (*ib)->ptr += ((*ib)->sa_bo.offset >> 2); + (*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; + (*ib)->gpu_addr += (*ib)->sa_bo.offset; + (*ib)->fence = fence; + (*ib)->vm_id = 0; + /* ib are most likely to be allocated in a ring fashion + * thus rdev->ib_pool.head_id should be the id of the + * oldest ib + */ + rdev->ib_pool.head_id = (1 + idx); + rdev->ib_pool.head_id &= (RADEON_IB_POOL_SIZE - 1); + mutex_unlock(&rdev->ib_pool.mutex); + return 0; + } } - mutex_lock(&rdev->ib_pool.mutex); + idx = (idx + 1) & (RADEON_IB_POOL_SIZE - 1); + } + /* this should be rare event, ie all ib scheduled none signaled yet. + */ + for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { + if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) { + r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false); + if (!r) { + goto retry; + } + /* an error happened */ + break; + } + idx = (idx + 1) & (RADEON_IB_POOL_SIZE - 1); } - radeon_fence_unref(&nib->fence); - nib->fence = fence; - nib->length_dw = 0; mutex_unlock(&rdev->ib_pool.mutex); - *ib = nib; - return 0; + radeon_fence_unref(&fence); + return r; } void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) @@ -169,247 +171,255 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) if (tmp == NULL) { return; } - if (!tmp->fence->emited) - radeon_fence_unref(&tmp->fence); mutex_lock(&rdev->ib_pool.mutex); - tmp->free = true; + if (tmp->fence && !tmp->fence->emitted) { + radeon_sa_bo_free(rdev, &tmp->sa_bo); + radeon_fence_unref(&tmp->fence); + } mutex_unlock(&rdev->ib_pool.mutex); } int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; int r = 0; - if (!ib->length_dw || !rdev->cp.ready) { + if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ DRM_ERROR("radeon: couldn't schedule IB(%u).\n", ib->idx); return -EINVAL; } /* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, 64); + r = radeon_ring_lock(rdev, ring, 64); if (r) { DRM_ERROR("radeon: scheduling IB failed (%d).\n", r); return r; } - radeon_ring_ib_execute(rdev, ib); + radeon_ring_ib_execute(rdev, ib->fence->ring, ib); radeon_fence_emit(rdev, ib->fence); - mutex_lock(&rdev->ib_pool.mutex); - /* once scheduled IB is considered free and protected by the fence */ - ib->free = true; - mutex_unlock(&rdev->ib_pool.mutex); - radeon_ring_unlock_commit(rdev); + radeon_ring_unlock_commit(rdev, ring); return 0; } int radeon_ib_pool_init(struct radeon_device *rdev) { - void *ptr; - uint64_t gpu_addr; - int i; - int r = 0; + int i, r; - if (rdev->ib_pool.robj) + mutex_lock(&rdev->ib_pool.mutex); + if (rdev->ib_pool.ready) { + mutex_unlock(&rdev->ib_pool.mutex); return 0; - INIT_LIST_HEAD(&rdev->ib_pool.bogus_ib); - /* Allocate 1M object buffer */ - r = radeon_bo_create(rdev, RADEON_IB_POOL_SIZE*64*1024, - PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, - &rdev->ib_pool.robj); - if (r) { - DRM_ERROR("radeon: failed to ib pool (%d).\n", r); - return r; } - r = radeon_bo_reserve(rdev->ib_pool.robj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->ib_pool.robj, RADEON_GEM_DOMAIN_GTT, &gpu_addr); - if (r) { - radeon_bo_unreserve(rdev->ib_pool.robj); - DRM_ERROR("radeon: failed to pin ib pool (%d).\n", r); - return r; - } - r = radeon_bo_kmap(rdev->ib_pool.robj, &ptr); - radeon_bo_unreserve(rdev->ib_pool.robj); + + r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager, + RADEON_IB_POOL_SIZE*64*1024, + RADEON_GEM_DOMAIN_GTT); if (r) { - DRM_ERROR("radeon: failed to map ib pool (%d).\n", r); + mutex_unlock(&rdev->ib_pool.mutex); return r; } - for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { - unsigned offset; - offset = i * 64 * 1024; - rdev->ib_pool.ibs[i].gpu_addr = gpu_addr + offset; - rdev->ib_pool.ibs[i].ptr = ptr + offset; + for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { + rdev->ib_pool.ibs[i].fence = NULL; rdev->ib_pool.ibs[i].idx = i; rdev->ib_pool.ibs[i].length_dw = 0; - rdev->ib_pool.ibs[i].free = true; + INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].sa_bo.list); } rdev->ib_pool.head_id = 0; rdev->ib_pool.ready = true; DRM_INFO("radeon: ib pool ready.\n"); + if (radeon_debugfs_ib_init(rdev)) { DRM_ERROR("Failed to register debugfs file for IB !\n"); } - return r; + if (radeon_debugfs_ring_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for rings !\n"); + } + mutex_unlock(&rdev->ib_pool.mutex); + return 0; } void radeon_ib_pool_fini(struct radeon_device *rdev) { - int r; - struct radeon_bo *robj; + unsigned i; - if (!rdev->ib_pool.ready) { - return; - } mutex_lock(&rdev->ib_pool.mutex); - radeon_ib_bogus_cleanup(rdev); - robj = rdev->ib_pool.robj; - rdev->ib_pool.robj = NULL; - mutex_unlock(&rdev->ib_pool.mutex); - - if (robj) { - r = radeon_bo_reserve(robj, false); - if (likely(r == 0)) { - radeon_bo_kunmap(robj); - radeon_bo_unpin(robj); - radeon_bo_unreserve(robj); + if (rdev->ib_pool.ready) { + for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { + radeon_sa_bo_free(rdev, &rdev->ib_pool.ibs[i].sa_bo); + radeon_fence_unref(&rdev->ib_pool.ibs[i].fence); } - radeon_bo_unref(&robj); + radeon_sa_bo_manager_fini(rdev, &rdev->ib_pool.sa_manager); + rdev->ib_pool.ready = false; } + mutex_unlock(&rdev->ib_pool.mutex); } +int radeon_ib_pool_start(struct radeon_device *rdev) +{ + return radeon_sa_bo_manager_start(rdev, &rdev->ib_pool.sa_manager); +} + +int radeon_ib_pool_suspend(struct radeon_device *rdev) +{ + return radeon_sa_bo_manager_suspend(rdev, &rdev->ib_pool.sa_manager); +} /* * Ring. */ -void radeon_ring_free_size(struct radeon_device *rdev) +int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *ring) { - if (rdev->wb.enabled) - rdev->cp.rptr = le32_to_cpu(rdev->wb.wb[RADEON_WB_CP_RPTR_OFFSET/4]); - else { - if (rdev->family >= CHIP_R600) - rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); - else - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + /* r1xx-r5xx only has CP ring */ + if (rdev->family < CHIP_R600) + return RADEON_RING_TYPE_GFX_INDEX; + + if (rdev->family >= CHIP_CAYMAN) { + if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]) + return CAYMAN_RING_TYPE_CP1_INDEX; + else if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]) + return CAYMAN_RING_TYPE_CP2_INDEX; } + return RADEON_RING_TYPE_GFX_INDEX; +} + +void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 rptr; + + if (rdev->wb.enabled) + rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); + else + rptr = RREG32(ring->rptr_reg); + ring->rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; /* This works because ring_size is a power of 2 */ - rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4)); - rdev->cp.ring_free_dw -= rdev->cp.wptr; - rdev->cp.ring_free_dw &= rdev->cp.ptr_mask; - if (!rdev->cp.ring_free_dw) { - rdev->cp.ring_free_dw = rdev->cp.ring_size / 4; + ring->ring_free_dw = (ring->rptr + (ring->ring_size / 4)); + ring->ring_free_dw -= ring->wptr; + ring->ring_free_dw &= ring->ptr_mask; + if (!ring->ring_free_dw) { + ring->ring_free_dw = ring->ring_size / 4; } } -int radeon_ring_alloc(struct radeon_device *rdev, unsigned ndw) + +int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) { int r; /* Align requested size with padding so unlock_commit can * pad safely */ - ndw = (ndw + rdev->cp.align_mask) & ~rdev->cp.align_mask; - while (ndw > (rdev->cp.ring_free_dw - 1)) { - radeon_ring_free_size(rdev); - if (ndw < rdev->cp.ring_free_dw) { + ndw = (ndw + ring->align_mask) & ~ring->align_mask; + while (ndw > (ring->ring_free_dw - 1)) { + radeon_ring_free_size(rdev, ring); + if (ndw < ring->ring_free_dw) { break; } - r = radeon_fence_wait_next(rdev); + r = radeon_fence_wait_next(rdev, radeon_ring_index(rdev, ring)); if (r) return r; } - rdev->cp.count_dw = ndw; - rdev->cp.wptr_old = rdev->cp.wptr; + ring->count_dw = ndw; + ring->wptr_old = ring->wptr; return 0; } -int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw) +int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) { int r; - mutex_lock(&rdev->cp.mutex); - r = radeon_ring_alloc(rdev, ndw); + mutex_lock(&ring->mutex); + r = radeon_ring_alloc(rdev, ring, ndw); if (r) { - mutex_unlock(&rdev->cp.mutex); + mutex_unlock(&ring->mutex); return r; } return 0; } -void radeon_ring_commit(struct radeon_device *rdev) +void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) { unsigned count_dw_pad; unsigned i; /* We pad to match fetch size */ - count_dw_pad = (rdev->cp.align_mask + 1) - - (rdev->cp.wptr & rdev->cp.align_mask); + count_dw_pad = (ring->align_mask + 1) - + (ring->wptr & ring->align_mask); for (i = 0; i < count_dw_pad; i++) { - radeon_ring_write(rdev, 2 << 30); + radeon_ring_write(ring, ring->nop); } DRM_MEMORYBARRIER(); - radeon_cp_commit(rdev); + WREG32(ring->wptr_reg, (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask); + (void)RREG32(ring->wptr_reg); } -void radeon_ring_unlock_commit(struct radeon_device *rdev) +void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring) { - radeon_ring_commit(rdev); - mutex_unlock(&rdev->cp.mutex); + radeon_ring_commit(rdev, ring); + mutex_unlock(&ring->mutex); } -void radeon_ring_unlock_undo(struct radeon_device *rdev) +void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring) { - rdev->cp.wptr = rdev->cp.wptr_old; - mutex_unlock(&rdev->cp.mutex); + ring->wptr = ring->wptr_old; + mutex_unlock(&ring->mutex); } -int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size) +int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, + unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, + u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop) { int r; - rdev->cp.ring_size = ring_size; + ring->ring_size = ring_size; + ring->rptr_offs = rptr_offs; + ring->rptr_reg = rptr_reg; + ring->wptr_reg = wptr_reg; + ring->ptr_reg_shift = ptr_reg_shift; + ring->ptr_reg_mask = ptr_reg_mask; + ring->nop = nop; /* Allocate ring buffer */ - if (rdev->cp.ring_obj == NULL) { - r = radeon_bo_create(rdev, rdev->cp.ring_size, PAGE_SIZE, true, + if (ring->ring_obj == NULL) { + r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, - &rdev->cp.ring_obj); + &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); return r; } - r = radeon_bo_reserve(rdev->cp.ring_obj, false); + r = radeon_bo_reserve(ring->ring_obj, false); if (unlikely(r != 0)) return r; - r = radeon_bo_pin(rdev->cp.ring_obj, RADEON_GEM_DOMAIN_GTT, - &rdev->cp.gpu_addr); + r = radeon_bo_pin(ring->ring_obj, RADEON_GEM_DOMAIN_GTT, + &ring->gpu_addr); if (r) { - radeon_bo_unreserve(rdev->cp.ring_obj); + radeon_bo_unreserve(ring->ring_obj); dev_err(rdev->dev, "(%d) ring pin failed\n", r); return r; } - r = radeon_bo_kmap(rdev->cp.ring_obj, - (void **)&rdev->cp.ring); - radeon_bo_unreserve(rdev->cp.ring_obj); + r = radeon_bo_kmap(ring->ring_obj, + (void **)&ring->ring); + radeon_bo_unreserve(ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring map failed\n", r); return r; } } - rdev->cp.ptr_mask = (rdev->cp.ring_size / 4) - 1; - rdev->cp.ring_free_dw = rdev->cp.ring_size / 4; + ring->ptr_mask = (ring->ring_size / 4) - 1; + ring->ring_free_dw = ring->ring_size / 4; return 0; } -void radeon_ring_fini(struct radeon_device *rdev) +void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring) { int r; struct radeon_bo *ring_obj; - mutex_lock(&rdev->cp.mutex); - ring_obj = rdev->cp.ring_obj; - rdev->cp.ring = NULL; - rdev->cp.ring_obj = NULL; - mutex_unlock(&rdev->cp.mutex); + mutex_lock(&ring->mutex); + ring_obj = ring->ring_obj; + ring->ring = NULL; + ring->ring_obj = NULL; + mutex_unlock(&ring->mutex); if (ring_obj) { r = radeon_bo_reserve(ring_obj, false); @@ -422,72 +432,83 @@ void radeon_ring_fini(struct radeon_device *rdev) } } - /* * Debugfs info */ #if defined(CONFIG_DEBUG_FS) -static int radeon_debugfs_ib_info(struct seq_file *m, void *data) + +static int radeon_debugfs_ring_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *) m->private; - struct radeon_ib *ib = node->info_ent->data; - unsigned i; - - if (ib == NULL) { - return 0; - } - seq_printf(m, "IB %04u\n", ib->idx); - seq_printf(m, "IB fence %p\n", ib->fence); - seq_printf(m, "IB size %05u dwords\n", ib->length_dw); - for (i = 0; i < ib->length_dw; i++) { - seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]); + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + int ridx = *(int*)node->info_ent->data; + struct radeon_ring *ring = &rdev->ring[ridx]; + unsigned count, i, j; + + radeon_ring_free_size(rdev, ring); + count = (ring->ring_size / 4) - ring->ring_free_dw; + seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg)); + seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg)); + seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr); + seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr); + seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); + seq_printf(m, "%u dwords in ring\n", count); + i = ring->rptr; + for (j = 0; j <= count; j++) { + seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + i = (i + 1) & ring->ptr_mask; } return 0; } -static int radeon_debugfs_ib_bogus_info(struct seq_file *m, void *data) +static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX; +static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; +static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; + +static struct drm_info_list radeon_debugfs_ring_info_list[] = { + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index}, + {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index}, + {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index}, +}; + +static int radeon_debugfs_ib_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *) m->private; - struct radeon_device *rdev = node->info_ent->data; - struct radeon_ib *ib; + struct radeon_ib *ib = node->info_ent->data; unsigned i; - mutex_lock(&rdev->ib_pool.mutex); - if (list_empty(&rdev->ib_pool.bogus_ib)) { - mutex_unlock(&rdev->ib_pool.mutex); - seq_printf(m, "no bogus IB recorded\n"); + if (ib == NULL) { return 0; } - ib = list_first_entry(&rdev->ib_pool.bogus_ib, struct radeon_ib, list); - list_del_init(&ib->list); - mutex_unlock(&rdev->ib_pool.mutex); + seq_printf(m, "IB %04u\n", ib->idx); + seq_printf(m, "IB fence %p\n", ib->fence); seq_printf(m, "IB size %05u dwords\n", ib->length_dw); for (i = 0; i < ib->length_dw; i++) { seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]); } - vfree(ib->ptr); - kfree(ib); return 0; } static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE]; static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32]; +#endif -static struct drm_info_list radeon_debugfs_ib_bogus_info_list[] = { - {"radeon_ib_bogus", radeon_debugfs_ib_bogus_info, 0, NULL}, -}; +int radeon_debugfs_ring_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, radeon_debugfs_ring_info_list, + ARRAY_SIZE(radeon_debugfs_ring_info_list)); +#else + return 0; #endif +} int radeon_debugfs_ib_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) unsigned i; - int r; - radeon_debugfs_ib_bogus_info_list[0].data = rdev; - r = radeon_debugfs_add_files(rdev, radeon_debugfs_ib_bogus_info_list, 1); - if (r) - return r; for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i); radeon_debugfs_ib_list[i].name = radeon_debugfs_ib_names[i]; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c new file mode 100644 index 000000000000..4cce47e7dc0d --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -0,0 +1,189 @@ +/* + * Copyright 2011 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Jerome Glisse <glisse@freedesktop.org> + */ +#include "drmP.h" +#include "drm.h" +#include "radeon.h" + +int radeon_sa_bo_manager_init(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + unsigned size, u32 domain) +{ + int r; + + sa_manager->bo = NULL; + sa_manager->size = size; + sa_manager->domain = domain; + INIT_LIST_HEAD(&sa_manager->sa_bo); + + r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true, + RADEON_GEM_DOMAIN_CPU, &sa_manager->bo); + if (r) { + dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r); + return r; + } + + return r; +} + +void radeon_sa_bo_manager_fini(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager) +{ + struct radeon_sa_bo *sa_bo, *tmp; + + if (!list_empty(&sa_manager->sa_bo)) { + dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n"); + } + list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) { + list_del_init(&sa_bo->list); + } + radeon_bo_unref(&sa_manager->bo); + sa_manager->size = 0; +} + +int radeon_sa_bo_manager_start(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager) +{ + int r; + + if (sa_manager->bo == NULL) { + dev_err(rdev->dev, "no bo for sa manager\n"); + return -EINVAL; + } + + /* map the buffer */ + r = radeon_bo_reserve(sa_manager->bo, false); + if (r) { + dev_err(rdev->dev, "(%d) failed to reserve manager bo\n", r); + return r; + } + r = radeon_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr); + if (r) { + radeon_bo_unreserve(sa_manager->bo); + dev_err(rdev->dev, "(%d) failed to pin manager bo\n", r); + return r; + } + r = radeon_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); + radeon_bo_unreserve(sa_manager->bo); + return r; +} + +int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager) +{ + int r; + + if (sa_manager->bo == NULL) { + dev_err(rdev->dev, "no bo for sa manager\n"); + return -EINVAL; + } + + r = radeon_bo_reserve(sa_manager->bo, false); + if (!r) { + radeon_bo_kunmap(sa_manager->bo); + radeon_bo_unpin(sa_manager->bo); + radeon_bo_unreserve(sa_manager->bo); + } + return r; +} + +/* + * Principe is simple, we keep a list of sub allocation in offset + * order (first entry has offset == 0, last entry has the highest + * offset). + * + * When allocating new object we first check if there is room at + * the end total_size - (last_object_offset + last_object_size) >= + * alloc_size. If so we allocate new object there. + * + * When there is not enough room at the end, we start waiting for + * each sub object until we reach object_offset+object_size >= + * alloc_size, this object then become the sub object we return. + * + * Alignment can't be bigger than page size + */ +int radeon_sa_bo_new(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + struct radeon_sa_bo *sa_bo, + unsigned size, unsigned align) +{ + struct radeon_sa_bo *tmp; + struct list_head *head; + unsigned offset = 0, wasted = 0; + + BUG_ON(align > RADEON_GPU_PAGE_SIZE); + BUG_ON(size > sa_manager->size); + + /* no one ? */ + head = sa_manager->sa_bo.prev; + if (list_empty(&sa_manager->sa_bo)) { + goto out; + } + + /* look for a hole big enough */ + offset = 0; + list_for_each_entry(tmp, &sa_manager->sa_bo, list) { + /* room before this object ? */ + if ((tmp->offset - offset) >= size) { + head = tmp->list.prev; + goto out; + } + offset = tmp->offset + tmp->size; + wasted = offset % align; + if (wasted) { + wasted = align - wasted; + } + offset += wasted; + } + /* room at the end ? */ + head = sa_manager->sa_bo.prev; + tmp = list_entry(head, struct radeon_sa_bo, list); + offset = tmp->offset + tmp->size; + wasted = offset % align; + if (wasted) { + wasted = align - wasted; + } + offset += wasted; + if ((sa_manager->size - offset) < size) { + /* failed to find somethings big enough */ + return -ENOMEM; + } + +out: + sa_bo->manager = sa_manager; + sa_bo->offset = offset; + sa_bo->size = size; + list_add(&sa_bo->list, head); + return 0; +} + +void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo) +{ + list_del_init(&sa_bo->list); +} diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c new file mode 100644 index 000000000000..61dd4e3c9209 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -0,0 +1,178 @@ +/* + * Copyright 2011 Christian König. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König <deathsimple@vodafone.de> + */ +#include "drmP.h" +#include "drm.h" +#include "radeon.h" + +static int radeon_semaphore_add_bo(struct radeon_device *rdev) +{ + struct radeon_semaphore_bo *bo; + unsigned long irq_flags; + uint64_t gpu_addr; + uint32_t *cpu_ptr; + int r, i; + + + bo = kmalloc(sizeof(struct radeon_semaphore_bo), GFP_KERNEL); + if (bo == NULL) { + return -ENOMEM; + } + INIT_LIST_HEAD(&bo->free); + INIT_LIST_HEAD(&bo->list); + bo->nused = 0; + + r = radeon_ib_get(rdev, 0, &bo->ib, RADEON_SEMAPHORE_BO_SIZE); + if (r) { + dev_err(rdev->dev, "failed to get a bo after 5 retry\n"); + kfree(bo); + return r; + } + gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; + gpu_addr += bo->ib->sa_bo.offset; + cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr; + cpu_ptr += (bo->ib->sa_bo.offset >> 2); + for (i = 0; i < (RADEON_SEMAPHORE_BO_SIZE/8); i++) { + bo->semaphores[i].gpu_addr = gpu_addr; + bo->semaphores[i].cpu_ptr = cpu_ptr; + bo->semaphores[i].bo = bo; + list_add_tail(&bo->semaphores[i].list, &bo->free); + gpu_addr += 8; + cpu_ptr += 2; + } + write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags); + list_add_tail(&bo->list, &rdev->semaphore_drv.bo); + write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags); + return 0; +} + +static void radeon_semaphore_del_bo_locked(struct radeon_device *rdev, + struct radeon_semaphore_bo *bo) +{ + radeon_sa_bo_free(rdev, &bo->ib->sa_bo); + radeon_fence_unref(&bo->ib->fence); + list_del(&bo->list); + kfree(bo); +} + +void radeon_semaphore_shrink_locked(struct radeon_device *rdev) +{ + struct radeon_semaphore_bo *bo, *n; + + if (list_empty(&rdev->semaphore_drv.bo)) { + return; + } + /* only shrink if first bo has free semaphore */ + bo = list_first_entry(&rdev->semaphore_drv.bo, struct radeon_semaphore_bo, list); + if (list_empty(&bo->free)) { + return; + } + list_for_each_entry_safe_continue(bo, n, &rdev->semaphore_drv.bo, list) { + if (bo->nused) + continue; + radeon_semaphore_del_bo_locked(rdev, bo); + } +} + +int radeon_semaphore_create(struct radeon_device *rdev, + struct radeon_semaphore **semaphore) +{ + struct radeon_semaphore_bo *bo; + unsigned long irq_flags; + bool do_retry = true; + int r; + +retry: + *semaphore = NULL; + write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags); + list_for_each_entry(bo, &rdev->semaphore_drv.bo, list) { + if (list_empty(&bo->free)) + continue; + *semaphore = list_first_entry(&bo->free, struct radeon_semaphore, list); + (*semaphore)->cpu_ptr[0] = 0; + (*semaphore)->cpu_ptr[1] = 0; + list_del(&(*semaphore)->list); + bo->nused++; + break; + } + write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags); + + if (*semaphore == NULL) { + if (do_retry) { + do_retry = false; + r = radeon_semaphore_add_bo(rdev); + if (r) + return r; + goto retry; + } + return -ENOMEM; + } + + return 0; +} + +void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, + struct radeon_semaphore *semaphore) +{ + radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); +} + +void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, + struct radeon_semaphore *semaphore) +{ + radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); +} + +void radeon_semaphore_free(struct radeon_device *rdev, + struct radeon_semaphore *semaphore) +{ + unsigned long irq_flags; + + write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags); + semaphore->bo->nused--; + list_add_tail(&semaphore->list, &semaphore->bo->free); + radeon_semaphore_shrink_locked(rdev); + write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags); +} + +void radeon_semaphore_driver_fini(struct radeon_device *rdev) +{ + struct radeon_semaphore_bo *bo, *n; + unsigned long irq_flags; + + write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags); + /* we force to free everything */ + list_for_each_entry_safe(bo, n, &rdev->semaphore_drv.bo, list) { + if (!list_empty(&bo->free)) { + dev_err(rdev->dev, "still in use semaphore\n"); + } + radeon_semaphore_del_bo_locked(rdev, bo); + } + write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags); +} diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 602fa3541c45..dc5dcf483aa3 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -42,7 +42,9 @@ void radeon_test_moves(struct radeon_device *rdev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size; + n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + n -= rdev->ring[i].ring_size; if (rdev->wb.wb_obj) n -= RADEON_GPU_PAGE_SIZE; if (rdev->ih.ring_obj) @@ -104,7 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(gtt_obj[i]); - r = radeon_fence_create(rdev, &fence); + r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); goto out_cleanup; @@ -153,7 +155,7 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(vram_obj); - r = radeon_fence_create(rdev, &fence); + r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); goto out_cleanup; @@ -232,3 +234,264 @@ out_cleanup: printk(KERN_WARNING "Error while testing BO move.\n"); } } + +void radeon_test_ring_sync(struct radeon_device *rdev, + struct radeon_ring *ringA, + struct radeon_ring *ringB) +{ + struct radeon_fence *fence1 = NULL, *fence2 = NULL; + struct radeon_semaphore *semaphore = NULL; + int ridxA = radeon_ring_index(rdev, ringA); + int ridxB = radeon_ring_index(rdev, ringB); + int r; + + r = radeon_fence_create(rdev, &fence1, ridxA); + if (r) { + DRM_ERROR("Failed to create sync fence 1\n"); + goto out_cleanup; + } + r = radeon_fence_create(rdev, &fence2, ridxA); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + goto out_cleanup; + } + + r = radeon_semaphore_create(rdev, &semaphore); + if (r) { + DRM_ERROR("Failed to create semaphore\n"); + goto out_cleanup; + } + + r = radeon_ring_lock(rdev, ringA, 64); + if (r) { + DRM_ERROR("Failed to lock ring A %d\n", ridxA); + goto out_cleanup; + } + radeon_semaphore_emit_wait(rdev, ridxA, semaphore); + radeon_fence_emit(rdev, fence1); + radeon_semaphore_emit_wait(rdev, ridxA, semaphore); + radeon_fence_emit(rdev, fence2); + radeon_ring_unlock_commit(rdev, ringA); + + mdelay(1000); + + if (radeon_fence_signaled(fence1)) { + DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); + goto out_cleanup; + } + + r = radeon_ring_lock(rdev, ringB, 64); + if (r) { + DRM_ERROR("Failed to lock ring B %p\n", ringB); + goto out_cleanup; + } + radeon_semaphore_emit_signal(rdev, ridxB, semaphore); + radeon_ring_unlock_commit(rdev, ringB); + + r = radeon_fence_wait(fence1, false); + if (r) { + DRM_ERROR("Failed to wait for sync fence 1\n"); + goto out_cleanup; + } + + mdelay(1000); + + if (radeon_fence_signaled(fence2)) { + DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); + goto out_cleanup; + } + + r = radeon_ring_lock(rdev, ringB, 64); + if (r) { + DRM_ERROR("Failed to lock ring B %p\n", ringB); + goto out_cleanup; + } + radeon_semaphore_emit_signal(rdev, ridxB, semaphore); + radeon_ring_unlock_commit(rdev, ringB); + + r = radeon_fence_wait(fence2, false); + if (r) { + DRM_ERROR("Failed to wait for sync fence 1\n"); + goto out_cleanup; + } + +out_cleanup: + if (semaphore) + radeon_semaphore_free(rdev, semaphore); + + if (fence1) + radeon_fence_unref(&fence1); + + if (fence2) + radeon_fence_unref(&fence2); + + if (r) + printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); +} + +void radeon_test_ring_sync2(struct radeon_device *rdev, + struct radeon_ring *ringA, + struct radeon_ring *ringB, + struct radeon_ring *ringC) +{ + struct radeon_fence *fenceA = NULL, *fenceB = NULL; + struct radeon_semaphore *semaphore = NULL; + int ridxA = radeon_ring_index(rdev, ringA); + int ridxB = radeon_ring_index(rdev, ringB); + int ridxC = radeon_ring_index(rdev, ringC); + bool sigA, sigB; + int i, r; + + r = radeon_fence_create(rdev, &fenceA, ridxA); + if (r) { + DRM_ERROR("Failed to create sync fence 1\n"); + goto out_cleanup; + } + r = radeon_fence_create(rdev, &fenceB, ridxB); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + goto out_cleanup; + } + + r = radeon_semaphore_create(rdev, &semaphore); + if (r) { + DRM_ERROR("Failed to create semaphore\n"); + goto out_cleanup; + } + + r = radeon_ring_lock(rdev, ringA, 64); + if (r) { + DRM_ERROR("Failed to lock ring A %d\n", ridxA); + goto out_cleanup; + } + radeon_semaphore_emit_wait(rdev, ridxA, semaphore); + radeon_fence_emit(rdev, fenceA); + radeon_ring_unlock_commit(rdev, ringA); + + r = radeon_ring_lock(rdev, ringB, 64); + if (r) { + DRM_ERROR("Failed to lock ring B %d\n", ridxB); + goto out_cleanup; + } + radeon_semaphore_emit_wait(rdev, ridxB, semaphore); + radeon_fence_emit(rdev, fenceB); + radeon_ring_unlock_commit(rdev, ringB); + + mdelay(1000); + + if (radeon_fence_signaled(fenceA)) { + DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); + goto out_cleanup; + } + if (radeon_fence_signaled(fenceB)) { + DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); + goto out_cleanup; + } + + r = radeon_ring_lock(rdev, ringC, 64); + if (r) { + DRM_ERROR("Failed to lock ring B %p\n", ringC); + goto out_cleanup; + } + radeon_semaphore_emit_signal(rdev, ridxC, semaphore); + radeon_ring_unlock_commit(rdev, ringC); + + for (i = 0; i < 30; ++i) { + mdelay(100); + sigA = radeon_fence_signaled(fenceA); + sigB = radeon_fence_signaled(fenceB); + if (sigA || sigB) + break; + } + + if (!sigA && !sigB) { + DRM_ERROR("Neither fence A nor B has been signaled\n"); + goto out_cleanup; + } else if (sigA && sigB) { + DRM_ERROR("Both fence A and B has been signaled\n"); + goto out_cleanup; + } + + DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); + + r = radeon_ring_lock(rdev, ringC, 64); + if (r) { + DRM_ERROR("Failed to lock ring B %p\n", ringC); + goto out_cleanup; + } + radeon_semaphore_emit_signal(rdev, ridxC, semaphore); + radeon_ring_unlock_commit(rdev, ringC); + + mdelay(1000); + + r = radeon_fence_wait(fenceA, false); + if (r) { + DRM_ERROR("Failed to wait for sync fence A\n"); + goto out_cleanup; + } + r = radeon_fence_wait(fenceB, false); + if (r) { + DRM_ERROR("Failed to wait for sync fence B\n"); + goto out_cleanup; + } + +out_cleanup: + if (semaphore) + radeon_semaphore_free(rdev, semaphore); + + if (fenceA) + radeon_fence_unref(&fenceA); + + if (fenceB) + radeon_fence_unref(&fenceB); + + if (r) + printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); +} + +void radeon_test_syncing(struct radeon_device *rdev) +{ + int i, j, k; + + for (i = 1; i < RADEON_NUM_RINGS; ++i) { + struct radeon_ring *ringA = &rdev->ring[i]; + if (!ringA->ready) + continue; + + for (j = 0; j < i; ++j) { + struct radeon_ring *ringB = &rdev->ring[j]; + if (!ringB->ready) + continue; + + DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); + radeon_test_ring_sync(rdev, ringA, ringB); + + DRM_INFO("Testing syncing between rings %d and %d...\n", j, i); + radeon_test_ring_sync(rdev, ringB, ringA); + + for (k = 0; k < j; ++k) { + struct radeon_ring *ringC = &rdev->ring[k]; + if (!ringC->ready) + continue; + + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); + radeon_test_ring_sync2(rdev, ringA, ringB, ringC); + + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j); + radeon_test_ring_sync2(rdev, ringA, ringC, ringB); + + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k); + radeon_test_ring_sync2(rdev, ringB, ringA, ringC); + + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i); + radeon_test_ring_sync2(rdev, ringB, ringC, ringA); + + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j); + radeon_test_ring_sync2(rdev, ringC, ringA, ringB); + + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i); + radeon_test_ring_sync2(rdev, ringC, ringB, ringA); + } + } + } +} diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 0b5468bfaf54..c421e77ace71 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -114,24 +114,6 @@ static void radeon_ttm_global_fini(struct radeon_device *rdev) } } -struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev); - -static struct ttm_backend* -radeon_create_ttm_backend_entry(struct ttm_bo_device *bdev) -{ - struct radeon_device *rdev; - - rdev = radeon_get_rdev(bdev); -#if __OS_HAS_AGP - if (rdev->flags & RADEON_IS_AGP) { - return ttm_agp_backend_init(bdev, rdev->ddev->agp->bridge); - } else -#endif - { - return radeon_ttm_backend_create(rdev); - } -} - static int radeon_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) { return 0; @@ -206,7 +188,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, rbo = container_of(bo, struct radeon_bo, tbo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (rbo->rdev->cp.ready == false) + if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false) radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); else radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); @@ -241,10 +223,10 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; - int r; + int r, i; rdev = radeon_get_rdev(bo->bdev); - r = radeon_fence_create(rdev, &fence); + r = radeon_fence_create(rdev, &fence, rdev->copy_ring); if (unlikely(r)) { return r; } @@ -273,13 +255,43 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } - if (!rdev->cp.ready) { - DRM_ERROR("Trying to move memory with CP turned off.\n"); + if (!rdev->ring[rdev->copy_ring].ready) { + DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); + /* sync other rings */ + if (rdev->family >= CHIP_R600) { + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + /* no need to sync to our own or unused rings */ + if (i == rdev->copy_ring || !rdev->ring[i].ready) + continue; + + if (!fence->semaphore) { + r = radeon_semaphore_create(rdev, &fence->semaphore); + /* FIXME: handle semaphore error */ + if (r) + continue; + } + + r = radeon_ring_lock(rdev, &rdev->ring[i], 3); + /* FIXME: handle ring lock error */ + if (r) + continue; + radeon_semaphore_emit_signal(rdev, i, fence->semaphore); + radeon_ring_unlock_commit(rdev, &rdev->ring[i]); + + r = radeon_ring_lock(rdev, &rdev->ring[rdev->copy_ring], 3); + /* FIXME: handle ring lock error */ + if (r) + continue; + radeon_semaphore_emit_wait(rdev, rdev->copy_ring, fence->semaphore); + radeon_ring_unlock_commit(rdev, &rdev->ring[rdev->copy_ring]); + } + } + r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ fence); @@ -398,7 +410,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, radeon_move_null(bo, new_mem); return 0; } - if (!rdev->cp.ready || rdev->asic->copy == NULL) { + if (!rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready || rdev->asic->copy == NULL) { /* use memcpy */ goto memcpy; } @@ -515,8 +527,166 @@ static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg) return radeon_fence_signaled((struct radeon_fence *)sync_obj); } +/* + * TTM backend functions. + */ +struct radeon_ttm_tt { + struct ttm_dma_tt ttm; + struct radeon_device *rdev; + u64 offset; +}; + +static int radeon_ttm_backend_bind(struct ttm_tt *ttm, + struct ttm_mem_reg *bo_mem) +{ + struct radeon_ttm_tt *gtt = (void*)ttm; + int r; + + gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); + if (!ttm->num_pages) { + WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", + ttm->num_pages, bo_mem, ttm); + } + r = radeon_gart_bind(gtt->rdev, gtt->offset, + ttm->num_pages, ttm->pages, gtt->ttm.dma_address); + if (r) { + DRM_ERROR("failed to bind %lu pages at 0x%08X\n", + ttm->num_pages, (unsigned)gtt->offset); + return r; + } + return 0; +} + +static int radeon_ttm_backend_unbind(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages); + return 0; +} + +static void radeon_ttm_backend_destroy(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + ttm_dma_tt_fini(>t->ttm); + kfree(gtt); +} + +static struct ttm_backend_func radeon_backend_func = { + .bind = &radeon_ttm_backend_bind, + .unbind = &radeon_ttm_backend_unbind, + .destroy = &radeon_ttm_backend_destroy, +}; + +struct ttm_tt *radeon_ttm_tt_create(struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags, + struct page *dummy_read_page) +{ + struct radeon_device *rdev; + struct radeon_ttm_tt *gtt; + + rdev = radeon_get_rdev(bdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) { + return ttm_agp_tt_create(bdev, rdev->ddev->agp->bridge, + size, page_flags, dummy_read_page); + } +#endif + + gtt = kzalloc(sizeof(struct radeon_ttm_tt), GFP_KERNEL); + if (gtt == NULL) { + return NULL; + } + gtt->ttm.ttm.func = &radeon_backend_func; + gtt->rdev = rdev; + if (ttm_dma_tt_init(>t->ttm, bdev, size, page_flags, dummy_read_page)) { + kfree(gtt); + return NULL; + } + return >t->ttm.ttm; +} + +static int radeon_ttm_tt_populate(struct ttm_tt *ttm) +{ + struct radeon_device *rdev; + struct radeon_ttm_tt *gtt = (void *)ttm; + unsigned i; + int r; + + if (ttm->state != tt_unpopulated) + return 0; + + rdev = radeon_get_rdev(ttm->bdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) { + return ttm_agp_tt_populate(ttm); + } +#endif + +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl()) { + return ttm_dma_populate(>t->ttm, rdev->dev); + } +#endif + + r = ttm_pool_populate(ttm); + if (r) { + return r; + } + + for (i = 0; i < ttm->num_pages; i++) { + gtt->ttm.dma_address[i] = pci_map_page(rdev->pdev, ttm->pages[i], + 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) { + while (--i) { + pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + gtt->ttm.dma_address[i] = 0; + } + ttm_pool_unpopulate(ttm); + return -EFAULT; + } + } + return 0; +} + +static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) +{ + struct radeon_device *rdev; + struct radeon_ttm_tt *gtt = (void *)ttm; + unsigned i; + + rdev = radeon_get_rdev(ttm->bdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) { + ttm_agp_tt_unpopulate(ttm); + return; + } +#endif + +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl()) { + ttm_dma_unpopulate(>t->ttm, rdev->dev); + return; + } +#endif + + for (i = 0; i < ttm->num_pages; i++) { + if (gtt->ttm.dma_address[i]) { + pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + } + } + + ttm_pool_unpopulate(ttm); +} + static struct ttm_bo_driver radeon_bo_driver = { - .create_ttm_backend_entry = &radeon_create_ttm_backend_entry, + .ttm_tt_create = &radeon_ttm_tt_create, + .ttm_tt_populate = &radeon_ttm_tt_populate, + .ttm_tt_unpopulate = &radeon_ttm_tt_unpopulate, .invalidate_caches = &radeon_invalidate_caches, .init_mem_type = &radeon_init_mem_type, .evict_flags = &radeon_evict_flags, @@ -680,124 +850,6 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma) } -/* - * TTM backend functions. - */ -struct radeon_ttm_backend { - struct ttm_backend backend; - struct radeon_device *rdev; - unsigned long num_pages; - struct page **pages; - struct page *dummy_read_page; - dma_addr_t *dma_addrs; - bool populated; - bool bound; - unsigned offset; -}; - -static int radeon_ttm_backend_populate(struct ttm_backend *backend, - unsigned long num_pages, - struct page **pages, - struct page *dummy_read_page, - dma_addr_t *dma_addrs) -{ - struct radeon_ttm_backend *gtt; - - gtt = container_of(backend, struct radeon_ttm_backend, backend); - gtt->pages = pages; - gtt->dma_addrs = dma_addrs; - gtt->num_pages = num_pages; - gtt->dummy_read_page = dummy_read_page; - gtt->populated = true; - return 0; -} - -static void radeon_ttm_backend_clear(struct ttm_backend *backend) -{ - struct radeon_ttm_backend *gtt; - - gtt = container_of(backend, struct radeon_ttm_backend, backend); - gtt->pages = NULL; - gtt->dma_addrs = NULL; - gtt->num_pages = 0; - gtt->dummy_read_page = NULL; - gtt->populated = false; - gtt->bound = false; -} - - -static int radeon_ttm_backend_bind(struct ttm_backend *backend, - struct ttm_mem_reg *bo_mem) -{ - struct radeon_ttm_backend *gtt; - int r; - - gtt = container_of(backend, struct radeon_ttm_backend, backend); - gtt->offset = bo_mem->start << PAGE_SHIFT; - if (!gtt->num_pages) { - WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", - gtt->num_pages, bo_mem, backend); - } - r = radeon_gart_bind(gtt->rdev, gtt->offset, - gtt->num_pages, gtt->pages, gtt->dma_addrs); - if (r) { - DRM_ERROR("failed to bind %lu pages at 0x%08X\n", - gtt->num_pages, gtt->offset); - return r; - } - gtt->bound = true; - return 0; -} - -static int radeon_ttm_backend_unbind(struct ttm_backend *backend) -{ - struct radeon_ttm_backend *gtt; - - gtt = container_of(backend, struct radeon_ttm_backend, backend); - radeon_gart_unbind(gtt->rdev, gtt->offset, gtt->num_pages); - gtt->bound = false; - return 0; -} - -static void radeon_ttm_backend_destroy(struct ttm_backend *backend) -{ - struct radeon_ttm_backend *gtt; - - gtt = container_of(backend, struct radeon_ttm_backend, backend); - if (gtt->bound) { - radeon_ttm_backend_unbind(backend); - } - kfree(gtt); -} - -static struct ttm_backend_func radeon_backend_func = { - .populate = &radeon_ttm_backend_populate, - .clear = &radeon_ttm_backend_clear, - .bind = &radeon_ttm_backend_bind, - .unbind = &radeon_ttm_backend_unbind, - .destroy = &radeon_ttm_backend_destroy, -}; - -struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev) -{ - struct radeon_ttm_backend *gtt; - - gtt = kzalloc(sizeof(struct radeon_ttm_backend), GFP_KERNEL); - if (gtt == NULL) { - return NULL; - } - gtt->backend.bdev = &rdev->mman.bdev; - gtt->backend.flags = 0; - gtt->backend.func = &radeon_backend_func; - gtt->rdev = rdev; - gtt->pages = NULL; - gtt->num_pages = 0; - gtt->dummy_read_page = NULL; - gtt->populated = false; - gtt->bound = false; - return >t->backend; -} - #define RADEON_DEBUGFS_MEM_TYPES 2 #if defined(CONFIG_DEBUG_FS) @@ -820,8 +872,8 @@ static int radeon_mm_dump_table(struct seq_file *m, void *data) static int radeon_ttm_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES+1]; - static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES+1][32]; + static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES+2]; + static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES+2][32]; unsigned i; for (i = 0; i < RADEON_DEBUGFS_MEM_TYPES; i++) { @@ -843,8 +895,17 @@ static int radeon_ttm_debugfs_init(struct radeon_device *rdev) radeon_mem_types_list[i].name = radeon_mem_types_names[i]; radeon_mem_types_list[i].show = &ttm_page_alloc_debugfs; radeon_mem_types_list[i].driver_features = 0; - radeon_mem_types_list[i].data = NULL; - return radeon_debugfs_add_files(rdev, radeon_mem_types_list, RADEON_DEBUGFS_MEM_TYPES+1); + radeon_mem_types_list[i++].data = NULL; +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl()) { + sprintf(radeon_mem_types_names[i], "ttm_dma_page_pool"); + radeon_mem_types_list[i].name = radeon_mem_types_names[i]; + radeon_mem_types_list[i].show = &ttm_dma_page_alloc_debugfs; + radeon_mem_types_list[i].driver_features = 0; + radeon_mem_types_list[i++].data = NULL; + } +#endif + return radeon_debugfs_add_files(rdev, radeon_mem_types_list, i); #endif return 0; diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index 06b90c87f8f3..b0ce84a20a68 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -410,6 +410,12 @@ static int rs400_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -419,11 +425,18 @@ static int rs400_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } + return 0; } @@ -447,11 +460,14 @@ int rs400_resume(struct radeon_device *rdev) r300_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return rs400_startup(rdev); } int rs400_suspend(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); r100_irq_disable(rdev); @@ -530,7 +546,14 @@ int rs400_init(struct radeon_device *rdev) if (r) return r; r300_set_reg_safe(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = rs400_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index b1053d640423..803e0d3c1773 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -324,10 +324,10 @@ void rs600_hpd_fini(struct radeon_device *rdev) void rs600_bm_disable(struct radeon_device *rdev) { - u32 tmp; + u16 tmp; /* disable bus mastering */ - pci_read_config_word(rdev->pdev, 0x4, (u16*)&tmp); + pci_read_config_word(rdev->pdev, 0x4, &tmp); pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB); mdelay(1); } @@ -549,7 +549,7 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { tmp |= S_000040_SW_INT_EN(1); } if (rdev->irq.gui_idle) { @@ -642,7 +642,7 @@ int rs600_irq_process(struct radeon_device *rdev) while (status || rdev->irq.stat_regs.r500.disp_int) { /* SW interrupt */ if (G_000044_SW_INT(status)) { - radeon_fence_process(rdev); + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); } /* GUI idle */ if (G_000040_GUI_IDLE(status)) { @@ -849,6 +849,12 @@ static int rs600_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -858,15 +864,21 @@ static int rs600_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = r600_audio_init(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed initializing audio\n"); return r; } - r = r600_audio_init(rdev); + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing audio\n"); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } @@ -891,11 +903,14 @@ int rs600_resume(struct radeon_device *rdev) rv515_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return rs600_startup(rdev); } int rs600_suspend(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); r600_audio_fini(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); @@ -976,7 +991,14 @@ int rs600_init(struct radeon_device *rdev) if (r) return r; rs600_set_safe_registers(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = rs600_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index a9049ed1a519..4f24a0fa8c82 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -621,6 +621,12 @@ static int rs690_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -630,15 +636,21 @@ static int rs690_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = r600_audio_init(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed initializing audio\n"); return r; } - r = r600_audio_init(rdev); + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing audio\n"); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } @@ -663,11 +675,14 @@ int rs690_resume(struct radeon_device *rdev) rv515_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return rs690_startup(rdev); } int rs690_suspend(struct radeon_device *rdev) { + radeon_ib_pool_suspend(rdev); r600_audio_fini(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); @@ -749,7 +764,14 @@ int rs690_init(struct radeon_device *rdev) if (r) return r; rs600_set_safe_registers(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = rs690_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 6613ee9ecca3..880637fd1946 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -55,44 +55,45 @@ void rv515_debugfs(struct radeon_device *rdev) void rv515_ring_start(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; - r = radeon_ring_lock(rdev, 64); + r = radeon_ring_lock(rdev, ring, 64); if (r) { return; } - radeon_ring_write(rdev, PACKET0(ISYNC_CNTL, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(ISYNC_CNTL, 0)); + radeon_ring_write(ring, ISYNC_ANY2D_IDLE3D | ISYNC_ANY3D_IDLE2D | ISYNC_WAIT_IDLEGUI | ISYNC_CPSCRATCH_IDLEGUI); - radeon_ring_write(rdev, PACKET0(WAIT_UNTIL, 0)); - radeon_ring_write(rdev, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN); - radeon_ring_write(rdev, PACKET0(R300_DST_PIPE_CONFIG, 0)); - radeon_ring_write(rdev, R300_PIPE_AUTO_CONFIG); - radeon_ring_write(rdev, PACKET0(GB_SELECT, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(GB_ENABLE, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(R500_SU_REG_DEST, 0)); - radeon_ring_write(rdev, (1 << rdev->num_gb_pipes) - 1); - radeon_ring_write(rdev, PACKET0(VAP_INDEX_OFFSET, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, RB3D_DC_FLUSH | RB3D_DC_FREE); - radeon_ring_write(rdev, PACKET0(ZB_ZCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, ZC_FLUSH | ZC_FREE); - radeon_ring_write(rdev, PACKET0(WAIT_UNTIL, 0)); - radeon_ring_write(rdev, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN); - radeon_ring_write(rdev, PACKET0(GB_AA_CONFIG, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, RB3D_DC_FLUSH | RB3D_DC_FREE); - radeon_ring_write(rdev, PACKET0(ZB_ZCACHE_CTLSTAT, 0)); - radeon_ring_write(rdev, ZC_FLUSH | ZC_FREE); - radeon_ring_write(rdev, PACKET0(GB_MSPOS0, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(WAIT_UNTIL, 0)); + radeon_ring_write(ring, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN); + radeon_ring_write(ring, PACKET0(R300_DST_PIPE_CONFIG, 0)); + radeon_ring_write(ring, R300_PIPE_AUTO_CONFIG); + radeon_ring_write(ring, PACKET0(GB_SELECT, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(GB_ENABLE, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(R500_SU_REG_DEST, 0)); + radeon_ring_write(ring, (1 << rdev->num_gb_pipes) - 1); + radeon_ring_write(ring, PACKET0(VAP_INDEX_OFFSET, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, RB3D_DC_FLUSH | RB3D_DC_FREE); + radeon_ring_write(ring, PACKET0(ZB_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, ZC_FLUSH | ZC_FREE); + radeon_ring_write(ring, PACKET0(WAIT_UNTIL, 0)); + radeon_ring_write(ring, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN); + radeon_ring_write(ring, PACKET0(GB_AA_CONFIG, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, RB3D_DC_FLUSH | RB3D_DC_FREE); + radeon_ring_write(ring, PACKET0(ZB_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(ring, ZC_FLUSH | ZC_FREE); + radeon_ring_write(ring, PACKET0(GB_MSPOS0, 0)); + radeon_ring_write(ring, ((6 << MS_X0_SHIFT) | (6 << MS_Y0_SHIFT) | (6 << MS_X1_SHIFT) | @@ -101,8 +102,8 @@ void rv515_ring_start(struct radeon_device *rdev) (6 << MS_Y2_SHIFT) | (6 << MSBD0_Y_SHIFT) | (6 << MSBD0_X_SHIFT))); - radeon_ring_write(rdev, PACKET0(GB_MSPOS1, 0)); - radeon_ring_write(rdev, + radeon_ring_write(ring, PACKET0(GB_MSPOS1, 0)); + radeon_ring_write(ring, ((6 << MS_X3_SHIFT) | (6 << MS_Y3_SHIFT) | (6 << MS_X4_SHIFT) | @@ -110,15 +111,15 @@ void rv515_ring_start(struct radeon_device *rdev) (6 << MS_X5_SHIFT) | (6 << MS_Y5_SHIFT) | (6 << MSBD1_SHIFT))); - radeon_ring_write(rdev, PACKET0(GA_ENHANCE, 0)); - radeon_ring_write(rdev, GA_DEADLOCK_CNTL | GA_FASTSYNC_CNTL); - radeon_ring_write(rdev, PACKET0(GA_POLY_MODE, 0)); - radeon_ring_write(rdev, FRONT_PTYPE_TRIANGE | BACK_PTYPE_TRIANGE); - radeon_ring_write(rdev, PACKET0(GA_ROUND_MODE, 0)); - radeon_ring_write(rdev, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST); - radeon_ring_write(rdev, PACKET0(0x20C8, 0)); - radeon_ring_write(rdev, 0); - radeon_ring_unlock_commit(rdev); + radeon_ring_write(ring, PACKET0(GA_ENHANCE, 0)); + radeon_ring_write(ring, GA_DEADLOCK_CNTL | GA_FASTSYNC_CNTL); + radeon_ring_write(ring, PACKET0(GA_POLY_MODE, 0)); + radeon_ring_write(ring, FRONT_PTYPE_TRIANGE | BACK_PTYPE_TRIANGE); + radeon_ring_write(ring, PACKET0(GA_ROUND_MODE, 0)); + radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST); + radeon_ring_write(ring, PACKET0(0x20C8, 0)); + radeon_ring_write(ring, 0); + radeon_ring_unlock_commit(rdev, ring); } int rv515_mc_wait_for_idle(struct radeon_device *rdev) @@ -392,6 +393,12 @@ static int rv515_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -401,9 +408,15 @@ static int rv515_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failed initializing CP (%d).\n", r); return r; } - r = r100_ib_init(rdev); + + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r100_ib_test(rdev); if (r) { - dev_err(rdev->dev, "failed initializing IB (%d).\n", r); + dev_err(rdev->dev, "failed testing IB (%d).\n", r); + rdev->accel_working = false; return r; } return 0; @@ -428,6 +441,8 @@ int rv515_resume(struct radeon_device *rdev) rv515_clock_startup(rdev); /* Initialize surface registers */ radeon_surface_init(rdev); + + rdev->accel_working = true; return rv515_startup(rdev); } @@ -524,7 +539,14 @@ int rv515_init(struct radeon_device *rdev) if (r) return r; rv515_set_safe_registers(rdev); + + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = rv515_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 23ae1c60ab3d..a1668b659ddd 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -357,7 +357,7 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev) void r700_cp_fini(struct radeon_device *rdev) { r700_cp_stop(rdev); - radeon_ring_fini(rdev); + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); } /* @@ -1043,6 +1043,7 @@ int rv770_mc_init(struct radeon_device *rdev) static int rv770_startup(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; /* enable pcie gen2 link */ @@ -1082,6 +1083,12 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -1091,7 +1098,9 @@ static int rv770_startup(struct radeon_device *rdev) } r600_irq_set(rdev); - r = radeon_ring_init(rdev, rdev->cp.ring_size); + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, + R600_CP_RB_RPTR, R600_CP_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; r = rv770_cp_load_microcode(rdev); @@ -1101,6 +1110,17 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_start(rdev); + if (r) + return r; + + r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "IB test failed (%d).\n", r); + rdev->accel_working = false; + return r; + } + return 0; } @@ -1115,18 +1135,13 @@ int rv770_resume(struct radeon_device *rdev) /* post card */ atom_asic_init(rdev->mode_info.atom_context); + rdev->accel_working = true; r = rv770_startup(rdev); if (r) { DRM_ERROR("r600 startup failed on resume\n"); return r; } - r = r600_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d).\n", r); - return r; - } - r = r600_audio_init(rdev); if (r) { dev_err(rdev->dev, "radeon: audio init failed\n"); @@ -1140,13 +1155,14 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + radeon_ib_pool_suspend(rdev); + r600_blit_suspend(rdev); /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); - rdev->cp.ready = false; + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; r600_irq_suspend(rdev); radeon_wb_disable(rdev); rv770_pcie_gart_disable(rdev); - r600_blit_suspend(rdev); return 0; } @@ -1215,8 +1231,8 @@ int rv770_init(struct radeon_device *rdev) if (r) return r; - rdev->cp.ring_obj = NULL; - r600_ring_init(rdev, 1024 * 1024); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1225,30 +1241,24 @@ int rv770_init(struct radeon_device *rdev) if (r) return r; + r = radeon_ib_pool_init(rdev); rdev->accel_working = true; + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); + rdev->accel_working = false; + } + r = rv770_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); rdev->accel_working = false; } - if (rdev->accel_working) { - r = radeon_ib_pool_init(rdev); - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } else { - r = r600_ib_test(rdev); - if (r) { - dev_err(rdev->dev, "IB test failed (%d).\n", r); - rdev->accel_working = false; - } - } - } r = r600_audio_init(rdev); if (r) { @@ -1265,11 +1275,12 @@ void rv770_fini(struct radeon_device *rdev) r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - radeon_ib_pool_fini(rdev); + r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); + radeon_semaphore_driver_fini(rdev); radeon_fence_driver_fini(rdev); radeon_agp_fini(rdev); radeon_bo_fini(rdev); |