/* * Copyright © 2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Ben Widawsky * */ #include "debug.h" #include "evict.h" #define CR0_0_ME_STATE_CTRL (1 << 31) #define CR0_0_BP_SUPPRESS (1 << 15) #define CR0_0_SPF_EN (1 << 2) #define CR0_0_ACC_DIS (1 << 1) #define CR0_1_BES_CTRL (1 << 31) #define CR0_1_HALT_CTRL (1 << 30) #define CR0_1_SOFT_EXCEPTION_CTRL (1 << 29) #define CR0_1_ILLGL_OP_STS (1 << 28) #define CR0_1_STACK_OVRFLW_STS (1 << 27) #define CR0_0_ENTRY_UNMASK (CR0_0_SPF_EN | CR0_0_ACC_DIS) // TODO: Need to fix this for non breakpoint case #define CR0_1_ENTRY_UNMASK ~(CR0_1_BES_CTRL) #define CR0_0_RETURN_MASK ~(CR0_0_ME_STATE_CTRL | CR0_0_SPF_EN | CR0_0_ACC_DIS) // TODO: not sure how to make this not hardcoded #define PER_THREAD_SCRATCH_SIZE (1 << 20) #define PER_THREAD_QWORDS (PER_THREAD_SCRATCH_SIZE >> 4) /* Should get this from brw_defines.h */ #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 #define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 #define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 #define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 /* desc field, ie. dword3 6.3.66.2 and 2.11.2.1.4 */ #define SEND_MLEN_5 (5<<25) #define SEND_MLEN_3 (3<<25) #define SEND_MLEN_2 (2<<25) #define SEND_MLEN_1 (1<<25) #define SEND_RLEN_1 (1<<20) #define SEND_RLEN_0 (0<<20) #define SEND_HEADER_PRESENT (1<<19) #define SEND_WRITE_COMMIT (1<<17) #define SEND_TYPE_WRITE (GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE<<13) #define SEND_TYPE_READ (BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ<<13) #define SEND_BLOCK_SIZE1 (BRW_DATAPORT_OWORD_BLOCK_2_OWORDS<<8) #define SEND_BLOCK_SIZE2 (BRW_DATAPORT_OWORD_BLOCK_4_OWORDS<<8) #define SEND_BLOCK_SIZE4 (BRW_DATAPORT_OWORD_BLOCK_8_OWORDS<<8) #define SEND_BINDING_TABLE (255<<0) // No write commit #define WRITE_DESC1_XXX SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2 #define WRITE_DESC1_WC SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2 | SEND_WRITE_COMMIT #define WRITE_DESC2 SEND_BINDING_TABLE | SEND_BLOCK_SIZE2 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_3 #define WRITE_DESC4 SEND_BINDING_TABLE | SEND_BLOCK_SIZE4 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_5 #define RECV_DESC1 SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_READ | SEND_HEADER_PRESENT | SEND_MLEN_1 | SEND_RLEN_1 //#define SEND_DESC1 0x40902FF #define SEND_DESC1_WC 0x40b02FF /* ex_desc field 6.3.66.2 */ #define SEND_DP_RENDER_CACHE (5<<0) #define SEND_EOT (1<<5) #define SEND_EX_DESC SEND_DP_RENDER_CACHE /** * WRITE_SCRATCH1 - Write 2 owords. * cdst.2 - offset * cdst.5 - per thread scratch base, relative to gsba?? * cdst+1 - data to be written. */ #define WRITE_SCRATCH1(cdst) \ send (16) null cdst SEND_EX_DESC WRITE_DESC1_XXX FLAGS #define WRITE_SCRATCH1_WC(cdst) \ send (16) g1 cdst SEND_EX_DESC WRITE_DESC1_WC FLAGS #define WRITE_SCRATCH2(cdst) \ send (16) null cdst SEND_EX_DESC WRITE_DESC2 FLAGS #define WRITE_SCRATCH4(cdst) \ send (16) null cdst SEND_EX_DESC WRITE_DESC4 FLAGS /** * READ_SCRATCH1 - Read 2 owords. * cdst.2 - offset * cdst.5 - per thread scratch base, relative to gsba?? * grf - register where read data is populated. */ #define READ_SCRATCH1(grf, cdst) \ send (16) grf:ud cdst SEND_EX_DESC RECV_DESC1 FLAGS /** * SET_OFFSET - setup mrf for the given offset prior to a send instruction. * mrf - message register to be used as the header. * offset - offset. * * If a WRITE_SCRATCH follows, mrf+1 -> mrf+1+n should contain the data to be * written. */ #define SET_OFFSET(mrf, offset) \ mov (1) mrf.5:ud g0.5:ud FLAGS; \ mov (1) mrf.2:ud offset:ud FLAGS /** * SAVE_CRF - save the control register * clobbers: m0.2, m0.5 */ #define CR_OFFSET 0x40 #define SAVE_CRF \ SET_OFFSET(m0, CR_OFFSET); \ mov (8) m1:ud 0xdeadbeef:ud FLAGS; \ mov (1) m1.0:ud cr0.0 FLAGS; \ mov (1) m1.1:ud cr0.1 FLAGS; \ mov (1) m1.2:ud cr0.2 FLAGS; \ mov (1) m1.3:ud sr0:ud FLAGS; \ WRITE_SCRATCH1(m0) /* * clobbers: m0.2, m0.5 */ #define STORE_GRF(grf, offset) \ SET_OFFSET(m0, offset); \ mov (8) m1:ud grf:ud FLAGS; \ WRITE_SCRATCH1(m0) /* * clobbers: m0.2, m0.5 */ #define LOAD_GRF(grf, offset) \ SET_OFFSET(m0, offset); \ READ_SCRATCH1(grf, m0) /* * clobbers: mrf.2 mrf.5 */ #define STORE_MRF(mrf, offset) \ SET_OFFSET(mrf, offset); \ WRITE_SCRATCH1(mrf) /* * non-quirky semantics, unlike STORE_MRF * clobbers: g1 */ #define LOAD_MRF(mrf, offset) \ LOAD_GRF(g1, offset); \ mov (8) mrf:ud g1:ud FLAGS #define SAVE_ALL_MRF \ /* m1 is saved already */ \ STORE_MRF(m1, 0x2); \ STORE_MRF(m2, 0x4); \ STORE_MRF(m3, 0x6); \ STORE_MRF(m4, 0x8); \ STORE_MRF(m5, 0xa); \ STORE_MRF(m6, 0xc); \ STORE_MRF(m7, 0xe); \ STORE_MRF(m8, 0x10); \ STORE_MRF(m9, 0x12); \ STORE_MRF(m10, 0x14); \ STORE_MRF(m11, 0x16); \ STORE_MRF(m12, 0x18); \ STORE_MRF(m13, 0x1a); \ STORE_MRF(m14, 0x1c) #define RESTORE_ALL_MRF \ LOAD_MRF(m15, 0x1c); \ LOAD_MRF(m14, 0x1a); \ LOAD_MRF(m13, 0x18); \ LOAD_MRF(m12, 0x16); \ LOAD_MRF(m11, 0x14); \ LOAD_MRF(m10, 0x12); \ LOAD_MRF(m9, 0x10); \ LOAD_MRF(m8, 0xe); \ LOAD_MRF(m7, 0xc); \ LOAD_MRF(m6, 0xa); \ LOAD_MRF(m5, 0x8); \ LOAD_MRF(m4, 0x6); \ LOAD_MRF(m3, 0x4); \ LOAD_MRF(m2, 0x2); \ LOAD_MRF(m1, 0x0) #ifndef SANDYBRIDGE #error Only SandyBridge is supported #endif /* Default flags for an instruction */ #define FLAGS { ALIGN1, SWITCH, MASK_DISABLE, ACCWRCTRL} /* * We can clobber m0, and g0.4, everything else must be saved. */ Enter: nop; or (1) cr0.0 cr0.0 CR0_0_ENTRY_UNMASK:ud FLAGS; /* * g0.5 has the per thread scratch space when running in FS or VS. * If we don't have a valid g0.5, we can calculate a per thread scratch offset * using the system registers. The problem is we do not have a good way to know * the offset from GSBA. The system routine will have to be hardcoded or * dynamically patched with the correct offset. * TID is in sr0.0[2:0] * EUID is in sr0.0[11:8] */ #ifdef GPGPU mov (1) g0.4:ud 0:ud FLAGS; #if 0 /* This should work according to the docs, the add blows up */ shr (1) g0.8:uw sr0.0:uw 5 FLAGS; add (1) g0.16:ub gr0.16:ub sr0.0:ub FLAGS; #else shr (1) g0.8:uw sr0.0:uw 5 FLAGS; mov (1) g0.9:uw sr0.0:uw FLAGS; and (1) g0.9:uw g0.9:uw 0x7:uw FLAGS; add (1) g0.8:uw g0.8:uw g0.9:uw FLAGS; mov (1) g0.9:uw 0:uw FLAGS; mul (1) g0.4:ud g0.4:ud PER_THREAD_QWORDS FLAGS; #endif #endif mov (8) m0:ud 0:ud FLAGS; /* Saves must occur in order so as not to clobber the next register */ STORE_MRF(m0, 0); STORE_GRF(g0, 0x20); STORE_GRF(g1, 0x22); SAVE_ALL_MRF; mov (8) g1:ud STATE_EU_MSG:ud FLAGS; STORE_GRF(g1, STATE_QWORD); mov (8) g1:ud DEBUG_PROTOCOL_VERSION:ud FLAGS; STORE_GRF(g1, COMMUNICATION_QWORD); SAVE_CRF; EVICT_CACHE; wait n1:ud; EVICT_CACHE; /* Using this to try to keep coherency */ LOAD_GRF(g1, CR_OFFSET); LOAD_GRF(g1, COMMUNICATION_QWORD); LOAD_GRF(g1, STATE_QWORD); RESTORE_ALL_MRF; LOAD_GRF(g1, 0x22); LOAD_GRF(g0, 0x20); /* Clear breakpoint status */ and (1) cr0.1 cr0.1 CR0_1_ENTRY_UNMASK:ud FLAGS; /* set breakpoint suppress this should be conditional on bes */ or (1) cr0.0 cr0.0 CR0_0_BP_SUPPRESS:ud FLAGS; and (1) cr0.0 cr0.0 CR0_0_RETURN_MASK:ud FLAGS; nop;