summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-02-23 18:13:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2010-02-23 18:13:34 -0800
commit34e3f91b4e66e52b3e189b2f778bd37d68963ca8 (patch)
tree83e7a0a2d1865ad413ac76a7a0883d4ed3d52dc8
parent9f3a6284880ceea452903e2043c88d7226736318 (diff)
parent4e4ddd47774313accc86b233d6ca2c6a9037a671 (diff)
Merge branch 'drm-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6
* 'drm-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6: drm/vmwgfx: Fix queries if no dma buffer thrashing is occuring. drm/nv50: fix vram ptes on IGPs to point at stolen system memory drm/nv50: fix instmem binding on IGPs to point at stolen system memory drm/nv50: improve vram page table construction drm/nv50: more efficient clearing of gpu page table entries drm/nv50: make nv50_mem_vm_{bind,unbind} operate only on vram drm/nouveau: Fix up pre-nv17 analog load detection.
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c113
-rw-r--r--drivers/gpu/drm/nouveau/nv04_dac.c6
-rw-r--r--drivers/gpu/drm/nouveau/nv50_instmem.c58
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c108
5 files changed, 210 insertions, 76 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 5445cefdd03e..1c15ef37b71c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -583,6 +583,7 @@ struct drm_nouveau_private {
uint64_t vm_end;
struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
int vm_vram_pt_nr;
+ uint64_t vram_sys_base;
/* the mtrr covering the FB */
int fb_mtrr;
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 8f3a12f614ed..2dc09dbd817d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -285,53 +285,50 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
uint32_t flags, uint64_t phys)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
- struct nouveau_gpuobj **pgt;
- unsigned psz, pfl, pages;
-
- if (virt >= dev_priv->vm_gart_base &&
- (virt + size) < (dev_priv->vm_gart_base + dev_priv->vm_gart_size)) {
- psz = 12;
- pgt = &dev_priv->gart_info.sg_ctxdma;
- pfl = 0x21;
- virt -= dev_priv->vm_gart_base;
- } else
- if (virt >= dev_priv->vm_vram_base &&
- (virt + size) < (dev_priv->vm_vram_base + dev_priv->vm_vram_size)) {
- psz = 16;
- pgt = dev_priv->vm_vram_pt;
- pfl = 0x01;
- virt -= dev_priv->vm_vram_base;
- } else {
- NV_ERROR(dev, "Invalid address: 0x%16llx-0x%16llx\n",
- virt, virt + size - 1);
- return -EINVAL;
- }
+ struct nouveau_gpuobj *pgt;
+ unsigned block;
+ int i;
- pages = size >> psz;
+ virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1;
+ size = (size >> 16) << 1;
+
+ phys |= ((uint64_t)flags << 32);
+ phys |= 1;
+ if (dev_priv->vram_sys_base) {
+ phys += dev_priv->vram_sys_base;
+ phys |= 0x30;
+ }
dev_priv->engine.instmem.prepare_access(dev, true);
- if (flags & 0x80000000) {
- while (pages--) {
- struct nouveau_gpuobj *pt = pgt[virt >> 29];
- unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1;
+ while (size) {
+ unsigned offset_h = upper_32_bits(phys);
+ unsigned offset_l = lower_32_bits(phys);
+ unsigned pte, end;
+
+ for (i = 7; i >= 0; i--) {
+ block = 1 << (i + 1);
+ if (size >= block && !(virt & (block - 1)))
+ break;
+ }
+ offset_l |= (i << 7);
- nv_wo32(dev, pt, pte++, 0x00000000);
- nv_wo32(dev, pt, pte++, 0x00000000);
+ phys += block << 15;
+ size -= block;
- virt += (1 << psz);
- }
- } else {
- while (pages--) {
- struct nouveau_gpuobj *pt = pgt[virt >> 29];
- unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1;
- unsigned offset_h = upper_32_bits(phys) & 0xff;
- unsigned offset_l = lower_32_bits(phys);
+ while (block) {
+ pgt = dev_priv->vm_vram_pt[virt >> 14];
+ pte = virt & 0x3ffe;
- nv_wo32(dev, pt, pte++, offset_l | pfl);
- nv_wo32(dev, pt, pte++, offset_h | flags);
+ end = pte + block;
+ if (end > 16384)
+ end = 16384;
+ block -= (end - pte);
+ virt += (end - pte);
- phys += (1 << psz);
- virt += (1 << psz);
+ while (pte < end) {
+ nv_wo32(dev, pgt, pte++, offset_l);
+ nv_wo32(dev, pgt, pte++, offset_h);
+ }
}
}
dev_priv->engine.instmem.finish_access(dev);
@@ -356,7 +353,41 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
void
nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
{
- nv50_mem_vm_bind_linear(dev, virt, size, 0x80000000, 0);
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_gpuobj *pgt;
+ unsigned pages, pte, end;
+
+ virt -= dev_priv->vm_vram_base;
+ pages = (size >> 16) << 1;
+
+ dev_priv->engine.instmem.prepare_access(dev, true);
+ while (pages) {
+ pgt = dev_priv->vm_vram_pt[virt >> 29];
+ pte = (virt & 0x1ffe0000ULL) >> 15;
+
+ end = pte + pages;
+ if (end > 16384)
+ end = 16384;
+ pages -= (end - pte);
+ virt += (end - pte) << 15;
+
+ while (pte < end)
+ nv_wo32(dev, pgt, pte++, 0);
+ }
+ dev_priv->engine.instmem.finish_access(dev);
+
+ nv_wr32(dev, 0x100c80, 0x00050001);
+ if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) {
+ NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n");
+ NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80));
+ return;
+ }
+
+ nv_wr32(dev, 0x100c80, 0x00000001);
+ if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) {
+ NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n");
+ NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80));
+ }
}
/*
diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
index d0e038d28948..1d73b15d70da 100644
--- a/drivers/gpu/drm/nouveau/nv04_dac.c
+++ b/drivers/gpu/drm/nouveau/nv04_dac.c
@@ -119,7 +119,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
struct drm_connector *connector)
{
struct drm_device *dev = encoder->dev;
- uint8_t saved_seq1, saved_pi, saved_rpc1;
+ uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode;
uint8_t saved_palette0[3], saved_palette_mask;
uint32_t saved_rtest_ctrl, saved_rgen_ctrl;
int i;
@@ -135,6 +135,9 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
/* only implemented for head A for now */
NVSetOwner(dev, 0);
+ saved_cr_mode = NVReadVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX);
+ NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode | 0x80);
+
saved_seq1 = NVReadVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX);
NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1 & ~0x20);
@@ -203,6 +206,7 @@ out:
NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_PIXEL_INDEX, saved_pi);
NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_RPC1_INDEX, saved_rpc1);
NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1);
+ NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode);
if (blue == 0x18) {
NV_INFO(dev, "Load detected on head A\n");
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index 94400f777e7f..f0dc4e36ef05 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -76,6 +76,11 @@ nv50_instmem_init(struct drm_device *dev)
for (i = 0x1700; i <= 0x1710; i += 4)
priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i);
+ if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac)
+ dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10) << 12;
+ else
+ dev_priv->vram_sys_base = 0;
+
/* Reserve the last MiB of VRAM, we should probably try to avoid
* setting up the below tables over the top of the VBIOS image at
* some point.
@@ -172,16 +177,28 @@ nv50_instmem_init(struct drm_device *dev)
* We map the entire fake channel into the start of the PRAMIN BAR
*/
ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pt_size, 0x1000,
- 0, &priv->pramin_pt);
+ 0, &priv->pramin_pt);
if (ret)
return ret;
- for (i = 0, v = c_offset; i < pt_size; i += 8, v += 0x1000) {
- if (v < (c_offset + c_size))
- BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v | 1);
- else
- BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000009);
+ v = c_offset | 1;
+ if (dev_priv->vram_sys_base) {
+ v += dev_priv->vram_sys_base;
+ v |= 0x30;
+ }
+
+ i = 0;
+ while (v < dev_priv->vram_sys_base + c_offset + c_size) {
+ BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v);
+ BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000);
+ v += 0x1000;
+ i += 8;
+ }
+
+ while (i < pt_size) {
+ BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000000);
BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000);
+ i += 8;
}
BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->instance | 0x63);
@@ -416,7 +433,9 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
- uint32_t pte, pte_end, vram;
+ struct nouveau_gpuobj *pramin_pt = priv->pramin_pt->gpuobj;
+ uint32_t pte, pte_end;
+ uint64_t vram;
if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound)
return -EINVAL;
@@ -424,20 +443,24 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
NV_DEBUG(dev, "st=0x%0llx sz=0x%0llx\n",
gpuobj->im_pramin->start, gpuobj->im_pramin->size);
- pte = (gpuobj->im_pramin->start >> 12) << 3;
- pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte;
+ pte = (gpuobj->im_pramin->start >> 12) << 1;
+ pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
vram = gpuobj->im_backing_start;
NV_DEBUG(dev, "pramin=0x%llx, pte=%d, pte_end=%d\n",
gpuobj->im_pramin->start, pte, pte_end);
NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
+ vram |= 1;
+ if (dev_priv->vram_sys_base) {
+ vram += dev_priv->vram_sys_base;
+ vram |= 0x30;
+ }
+
dev_priv->engine.instmem.prepare_access(dev, true);
while (pte < pte_end) {
- nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, vram | 1);
- nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000);
-
- pte += 8;
+ nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
+ nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
vram += NV50_INSTMEM_PAGE_SIZE;
}
dev_priv->engine.instmem.finish_access(dev);
@@ -470,14 +493,13 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
if (gpuobj->im_bound == 0)
return -EINVAL;
- pte = (gpuobj->im_pramin->start >> 12) << 3;
- pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte;
+ pte = (gpuobj->im_pramin->start >> 12) << 1;
+ pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
dev_priv->engine.instmem.prepare_access(dev, true);
while (pte < pte_end) {
- nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, 0x00000009);
- nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000);
- pte += 8;
+ nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
+ nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
}
dev_priv->engine.instmem.finish_access(dev);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index d69caf92ffe7..0897359b3e4e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -182,25 +182,19 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
}
-static int vmw_cmd_dma(struct vmw_private *dev_priv,
- struct vmw_sw_context *sw_context,
- SVGA3dCmdHeader *header)
+static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
+ struct vmw_sw_context *sw_context,
+ SVGAGuestPtr *ptr,
+ struct vmw_dma_buffer **vmw_bo_p)
{
- uint32_t handle;
struct vmw_dma_buffer *vmw_bo = NULL;
struct ttm_buffer_object *bo;
- struct vmw_surface *srf = NULL;
- struct vmw_dma_cmd {
- SVGA3dCmdHeader header;
- SVGA3dCmdSurfaceDMA dma;
- } *cmd;
+ uint32_t handle = ptr->gmrId;
struct vmw_relocation *reloc;
- int ret;
uint32_t cur_validate_node;
struct ttm_validate_buffer *val_buf;
+ int ret;
- cmd = container_of(header, struct vmw_dma_cmd, header);
- handle = cmd->dma.guest.ptr.gmrId;
ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
if (unlikely(ret != 0)) {
DRM_ERROR("Could not find or use GMR region.\n");
@@ -209,14 +203,14 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
bo = &vmw_bo->base;
if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) {
- DRM_ERROR("Max number of DMA commands per submission"
+ DRM_ERROR("Max number relocations per submission"
" exceeded\n");
ret = -EINVAL;
goto out_no_reloc;
}
reloc = &sw_context->relocs[sw_context->cur_reloc++];
- reloc->location = &cmd->dma.guest.ptr;
+ reloc->location = ptr;
cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
@@ -234,7 +228,89 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
list_add_tail(&val_buf->head, &sw_context->validate_nodes);
++sw_context->cur_val_buf;
}
+ *vmw_bo_p = vmw_bo;
+ return 0;
+
+out_no_reloc:
+ vmw_dmabuf_unreference(&vmw_bo);
+ vmw_bo_p = NULL;
+ return ret;
+}
+
+static int vmw_cmd_end_query(struct vmw_private *dev_priv,
+ struct vmw_sw_context *sw_context,
+ SVGA3dCmdHeader *header)
+{
+ struct vmw_dma_buffer *vmw_bo;
+ struct vmw_query_cmd {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdEndQuery q;
+ } *cmd;
+ int ret;
+
+ cmd = container_of(header, struct vmw_query_cmd, header);
+ ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+ &cmd->q.guestResult,
+ &vmw_bo);
+ if (unlikely(ret != 0))
+ return ret;
+
+ vmw_dmabuf_unreference(&vmw_bo);
+ return 0;
+}
+static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
+ struct vmw_sw_context *sw_context,
+ SVGA3dCmdHeader *header)
+{
+ struct vmw_dma_buffer *vmw_bo;
+ struct vmw_query_cmd {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdWaitForQuery q;
+ } *cmd;
+ int ret;
+
+ cmd = container_of(header, struct vmw_query_cmd, header);
+ ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+ &cmd->q.guestResult,
+ &vmw_bo);
+ if (unlikely(ret != 0))
+ return ret;
+
+ vmw_dmabuf_unreference(&vmw_bo);
+ return 0;
+}
+
+
+static int vmw_cmd_dma(struct vmw_private *dev_priv,
+ struct vmw_sw_context *sw_context,
+ SVGA3dCmdHeader *header)
+{
+ struct vmw_dma_buffer *vmw_bo = NULL;
+ struct ttm_buffer_object *bo;
+ struct vmw_surface *srf = NULL;
+ struct vmw_dma_cmd {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdSurfaceDMA dma;
+ } *cmd;
+ int ret;
+
+ cmd = container_of(header, struct vmw_dma_cmd, header);
+ ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+ &cmd->dma.guest.ptr,
+ &vmw_bo);
+ if (unlikely(ret != 0))
+ return ret;
+
+ bo = &vmw_bo->base;
ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
cmd->dma.host.sid, &srf);
if (ret) {
@@ -379,8 +455,8 @@ static vmw_cmd_func vmw_cmd_funcs[SVGA_3D_CMD_MAX] = {
VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw),
VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check),
VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_cid_check),
- VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_cid_check),
- VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_cid_check),
+ VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_end_query),
+ VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_wait_query),
VMW_CMD_DEF(SVGA_3D_CMD_PRESENT_READBACK, &vmw_cmd_ok),
VMW_CMD_DEF(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN,
&vmw_cmd_blt_surf_screen_check)