nouveau: add ctxprogs generation for nv50/nv8x/nv9x This implements ctxprogs generation for nv50/nv8x/nv9x. The code seems to be working but needs cleanup before it will be committed upstream. This also fixes upstream bug #23198: nv50/NVS135M: video hangs/flickers when fullscreen Module option ctxfw=1 can be used to disable the generator. This version is http://0x04.net/~mwk/gen.diff [23-Jan-2010] The author of the patch is Marcin KoĆcielnicki <koriakin@0x04.net>. Signed-off-by: Anssi Hannula <anssi@mandriva.org> diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile index 48c290b..32db806 100644 --- a/drivers/gpu/drm/nouveau/Makefile +++ b/drivers/gpu/drm/nouveau/Makefile @@ -16,7 +16,7 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \ nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o \ nv04_graph.o nv10_graph.o nv20_graph.o \ nv40_graph.o nv50_graph.o \ - nv40_grctx.o \ + nv40_grctx.o nv50_grctx.o \ nv04_instmem.o nv50_instmem.o \ nv50_crtc.o nv50_dac.o nv50_sor.o \ nv50_cursor.o nv50_display.o nv50_fbcon.o \ diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 6b96904..bdc940e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -1024,6 +1024,7 @@ extern void nv50_graph_destroy_context(struct nouveau_channel *); extern int nv50_graph_load_context(struct nouveau_channel *); extern int nv50_graph_unload_context(struct drm_device *); extern void nv50_graph_context_switch(struct drm_device *); +extern void nv50_grctx_init(struct nouveau_grctx *); /* nouveau_grctx.c */ extern int nouveau_grctx_prog_load(struct drm_device *); diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c index 20319e5..3aaead1 100644 --- a/drivers/gpu/drm/nouveau/nv50_graph.c +++ b/drivers/gpu/drm/nouveau/nv50_graph.c @@ -28,20 +28,8 @@ #include "drm.h" #include "nouveau_drv.h" -MODULE_FIRMWARE("nouveau/nv50.ctxprog"); -MODULE_FIRMWARE("nouveau/nv50.ctxvals"); -MODULE_FIRMWARE("nouveau/nv84.ctxprog"); -MODULE_FIRMWARE("nouveau/nv84.ctxvals"); -MODULE_FIRMWARE("nouveau/nv86.ctxprog"); -MODULE_FIRMWARE("nouveau/nv86.ctxvals"); -MODULE_FIRMWARE("nouveau/nv92.ctxprog"); -MODULE_FIRMWARE("nouveau/nv92.ctxvals"); -MODULE_FIRMWARE("nouveau/nv94.ctxprog"); -MODULE_FIRMWARE("nouveau/nv94.ctxvals"); -MODULE_FIRMWARE("nouveau/nv96.ctxprog"); -MODULE_FIRMWARE("nouveau/nv96.ctxvals"); -MODULE_FIRMWARE("nouveau/nv98.ctxprog"); -MODULE_FIRMWARE("nouveau/nv98.ctxvals"); +#include "nouveau_grctx.h" + MODULE_FIRMWARE("nouveau/nva0.ctxprog"); MODULE_FIRMWARE("nouveau/nva0.ctxvals"); MODULE_FIRMWARE("nouveau/nva5.ctxprog"); @@ -111,9 +99,32 @@ nv50_graph_init_ctxctl(struct drm_device *dev) NV_DEBUG(dev, "\n"); - nouveau_grctx_prog_load(dev); - if (!dev_priv->engine.graph.ctxprog) - dev_priv->engine.graph.accel_blocked = true; + /* no NVAx yet */ + if (nouveau_ctxfw || (dev_priv->chipset & 0xf0) == 0xa0) { + nouveau_grctx_prog_load(dev); + dev_priv->engine.graph.grctx_size = 0x70000; + } + if (!dev_priv->engine.graph.ctxprog) { + if ((dev_priv->chipset & 0xf0) == 0xa0) { + /* :( */ + dev_priv->engine.graph.accel_blocked = true; + } else { + struct nouveau_grctx ctx = {}; + uint32_t cp[512]; + int i; + + ctx.dev = dev; + ctx.mode = NOUVEAU_GRCTX_PROG; + ctx.data = cp; + ctx.ctxprog_max = 512; + nv50_grctx_init(&ctx); + dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4; + + nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0); + for (i = 0; i < ctx.ctxprog_len; i++) + nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]); + } + } nv_wr32(dev, 0x400320, 4); nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0); @@ -187,13 +198,13 @@ nv50_graph_create_context(struct nouveau_channel *chan) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_gpuobj *ramin = chan->ramin->gpuobj; struct nouveau_gpuobj *ctx; - uint32_t grctx_size = 0x70000; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; int hdr, ret; NV_DEBUG(dev, "ch%d\n", chan->id); - ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, grctx_size, 0x1000, - NVOBJ_FLAG_ZERO_ALLOC | + ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size, + 0x1000, NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx); if (ret) return ret; @@ -203,7 +214,7 @@ nv50_graph_create_context(struct nouveau_channel *chan) dev_priv->engine.instmem.prepare_access(dev, true); nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002); nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance + - grctx_size - 1); + pgraph->grctx_size - 1); nv_wo32(dev, ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance); nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0); nv_wo32(dev, ramin, (hdr + 0x10)/4, 0); @@ -211,7 +222,15 @@ nv50_graph_create_context(struct nouveau_channel *chan) dev_priv->engine.instmem.finish_access(dev); dev_priv->engine.instmem.prepare_access(dev, true); - nouveau_grctx_vals_load(dev, ctx); + if (!pgraph->ctxprog) { + struct nouveau_grctx ctx = {}; + ctx.dev = chan->dev; + ctx.mode = NOUVEAU_GRCTX_VALS; + ctx.data = chan->ramin_grctx->gpuobj; + nv50_grctx_init(&ctx); + } else { + nouveau_grctx_vals_load(dev, ctx); + } nv_wo32(dev, ctx, 0x00000/4, chan->ramin->instance >> 12); if ((dev_priv->chipset & 0xf0) == 0xa0) nv_wo32(dev, ctx, 0x00004/4, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c new file mode 100644 index 0000000..49dc0f8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nv50_grctx.c @@ -0,0 +1,1455 @@ +/* + * Copyright 2009 Red Hat Inc., Marcin KoĆcielnicki + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ + +#define CP_FLAG_CLEAR 0 +#define CP_FLAG_SET 1 +#define CP_FLAG_SWAP_DIRECTION ((0 * 32) + 0) +#define CP_FLAG_SWAP_DIRECTION_LOAD 0 +#define CP_FLAG_SWAP_DIRECTION_SAVE 1 +#define CP_FLAG_UNK01 ((0 * 32) + 1) +#define CP_FLAG_UNK01_CLEAR 0 +#define CP_FLAG_UNK01_SET 1 +#define CP_FLAG_UNK03 ((0 * 32) + 3) +#define CP_FLAG_UNK03_CLEAR 0 +#define CP_FLAG_UNK03_SET 1 +#define CP_FLAG_USER_SAVE ((0 * 32) + 5) +#define CP_FLAG_USER_SAVE_NOT_PENDING 0 +#define CP_FLAG_USER_SAVE_PENDING 1 +#define CP_FLAG_USER_LOAD ((0 * 32) + 6) +#define CP_FLAG_USER_LOAD_NOT_PENDING 0 +#define CP_FLAG_USER_LOAD_PENDING 1 +#define CP_FLAG_UNK0B ((0 * 32) + 0xb) +#define CP_FLAG_UNK0B_CLEAR 0 +#define CP_FLAG_UNK0B_SET 1 +#define CP_FLAG_UNK1D ((0 * 32) + 0x1d) +#define CP_FLAG_UNK1D_CLEAR 0 +#define CP_FLAG_UNK1D_SET 1 +#define CP_FLAG_UNK20 ((1 * 32) + 0) +#define CP_FLAG_UNK20_CLEAR 0 +#define CP_FLAG_UNK20_SET 1 +#define CP_FLAG_UNK40 ((2 * 32) + 0) +#define CP_FLAG_UNK40_CLEAR 0 +#define CP_FLAG_UNK40_SET 1 +#define CP_FLAG_AUTO_SAVE ((2 * 32) + 4) +#define CP_FLAG_AUTO_SAVE_NOT_PENDING 0 +#define CP_FLAG_AUTO_SAVE_PENDING 1 +#define CP_FLAG_AUTO_LOAD ((2 * 32) + 5) +#define CP_FLAG_AUTO_LOAD_NOT_PENDING 0 +#define CP_FLAG_AUTO_LOAD_PENDING 1 +#define CP_FLAG_XFER ((2 * 32) + 11) +#define CP_FLAG_XFER_IDLE 0 +#define CP_FLAG_XFER_BUSY 1 +#define CP_FLAG_NEWCTX ((2 * 32) + 12) +#define CP_FLAG_NEWCTX_BUSY 0 +#define CP_FLAG_NEWCTX_DONE 1 +#define CP_FLAG_ALWAYS ((2 * 32) + 13) +#define CP_FLAG_ALWAYS_FALSE 0 +#define CP_FLAG_ALWAYS_TRUE 1 + +#define CP_CTX 0x00100000 +#define CP_CTX_COUNT 0x000f0000 +#define CP_CTX_COUNT_SHIFT 16 +#define CP_CTX_REG 0x00003fff +#define CP_LOAD_SR 0x00200000 +#define CP_LOAD_SR_VALUE 0x000fffff +#define CP_BRA 0x00400000 +#define CP_BRA_IP 0x0001ff00 +#define CP_BRA_IP_SHIFT 8 +#define CP_BRA_IF_CLEAR 0x00000080 +#define CP_BRA_FLAG 0x0000007f +#define CP_WAIT 0x00500000 +#define CP_WAIT_SET 0x00000080 +#define CP_WAIT_FLAG 0x0000007f +#define CP_SET 0x00700000 +#define CP_SET_1 0x00000080 +#define CP_SET_FLAG 0x0000007f +#define CP_NEWCTX 0x00600004 +#define CP_NEXT_TO_SWAP 0x00600005 +#define CP_SET_CONTEXT_POINTER 0x00600006 +#define CP_SET_XFER_POINTER 0x00600007 +#define CP_ENABLE 0x00600009 +#define CP_END 0x0060000c +#define CP_NEXT_TO_CURRENT 0x0060000d +#define CP_DISABLE1 0x0090ffff +#define CP_DISABLE2 0x0091ffff +#define CP_XFER_MAGIC_UNK01 0x008000ff +#define CP_XFER_MAGIC_UNK02 0x008800ff +#define CP_SEEK_MAGIC_UNK01 0x00c000ff +#define CP_SEEK_MAGIC_UNK02 0x00c800ff + +#include "drmP.h" +#include "nouveau_drv.h" +#include "nouveau_grctx.h" + +/* TODO: + * - document unimplemented bits compared to nvidia + * - use 0x4008fc instead of 0x1540? + */ + +enum cp_label { + cp_check_load = 1, + cp_setup_auto_load, + cp_setup_load, + cp_setup_save, + cp_swap_state, + cp_prepare_exit, + cp_exit, +}; + +/* + * non-trivial demagiced parts of ctx init go here + */ + +static void +nv50_graph_construct_xfer_m2mf(struct nouveau_grctx *ctx, int offset) +{ + /* m2mf state */ + /* 00: DMA_NOTIFY instance >> 4 */ + /* 01: DMA_BUFFER_IN instance >> 4 */ + /* 02: DMA_BUFFER_OUT instance >> 4 */ + /* 03: OFFSET_IN */ + /* 04: OFFSET_OUT */ + /* 05: PITCH_IN */ + /* 06: PITCH_OUT */ + /* 07: LINE_LENGTH */ + /* 08: LINE_COUNT */ + nv_wo32(ctx->dev, ctx->data, offset + 0x48, 0x00000021); /* 09: FORMAT: bits 0-4 INPUT_INC, bits 5-9 OUTPUT_INC */ + nv_wo32(ctx->dev, ctx->data, offset + 0x50, 0x00000001); /* 0a: LINEAR_IN */ + nv_wo32(ctx->dev, ctx->data, offset + 0x58, 0x00000002); /* 0b: TILING_MODE_IN: bits 0-2 y tiling, bits 3-5 z tiling */ + nv_wo32(ctx->dev, ctx->data, offset + 0x60, 0x00000100); /* 0c: TILING_PITCH_IN */ + nv_wo32(ctx->dev, ctx->data, offset + 0x68, 0x00000100); /* 0d: TILING_HEIGHT_IN */ + nv_wo32(ctx->dev, ctx->data, offset + 0x70, 0x00000001); /* 0e: TILING_DEPTH_IN */ + /* 0f: TILING_POSITION_IN_Z */ + /* 10: TILING_POSITION_IN */ + nv_wo32(ctx->dev, ctx->data, offset + 0x88, 0x00000001); /* 11: LINEAR_OUT */ + nv_wo32(ctx->dev, ctx->data, offset + 0x90, 0x00000002); /* 12: TILING_MODE_OUT: bits 0-2 y tiling, bits 3-5 z tiling */ + nv_wo32(ctx->dev, ctx->data, offset + 0x98, 0x00000100); /* 13: TILING_PITCH_OUT */ + nv_wo32(ctx->dev, ctx->data, offset + 0xa0, 0x00000100); /* 14: TILING_HEIGHT_OUT */ + nv_wo32(ctx->dev, ctx->data, offset + 0xa8, 0x00000001); /* 15: TILING_DEPTH_OUT */ + /* 16: TILING_POSITION_OUT_Z */ + /* 17: TILING_POSITION_OUT */ + /* 18: OFFSET_IN_HIGH */ + /* 19: OFFSET_OUT_HIGH */ +} + +static void +nv50_graph_construct_general(struct nouveau_grctx *ctx) +{ + struct drm_nouveau_private *dev_priv = ctx->dev->dev_private; + int i, j; + int offset; + uint32_t units = nv_rd32 (ctx->dev, 0x1540); + + cp_ctx(ctx, 0x400808, 7); + gr_def(ctx, 0x400814, 0x00000030); + cp_ctx(ctx, 0x400828, 1); + cp_ctx(ctx, 0x400834, 0x32); + if (dev_priv->chipset == 0x50) { + gr_def(ctx, 0x400834, 0xff400040); + gr_def(ctx, 0x400838, 0xfff00080); + gr_def(ctx, 0x40083c, 0xfff70090); + gr_def(ctx, 0x400840, 0xffe806a8); + } + gr_def(ctx, 0x400844, 0x00000002); + gr_def(ctx, 0x4008e8, 0x00000003); + gr_def(ctx, 0x4008ec, 0x00001000); + if (dev_priv->chipset == 0x50) + cp_ctx(ctx, 0x400908, 0xb); + else + cp_ctx(ctx, 0x400908, 0xc); + cp_ctx(ctx, 0x400c08, 0x2); + gr_def(ctx, 0x400c08, 0x0000fe0c); + cp_ctx(ctx, 0x401008, 0x4); + gr_def(ctx, 0x401014, 0x00001000); + cp_ctx(ctx, 0x401400, 0x8); + cp_ctx(ctx, 0x401424, 0x3); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x40142c, 0x0001fd87); + else + gr_def(ctx, 0x40142c, 0x00000187); + cp_ctx(ctx, 0x401540, 0x5); + gr_def(ctx, 0x401550, 0x00001018); + cp_ctx(ctx, 0x401814, 0x1); + gr_def(ctx, 0x401814, 0x000000ff); + if (dev_priv->chipset == 0x50) { + cp_ctx(ctx, 0x40181c, 0xe); + gr_def(ctx, 0x401850, 0x00000004); + } else { + cp_ctx(ctx, 0x40181c, 0xf); + gr_def(ctx, 0x401854, 0x00000004); + } + cp_ctx(ctx, 0x401c00, 0x1); + switch (dev_priv->chipset) { + case 0x50: + gr_def(ctx, 0x401c00, 0x0001005f); + break; + case 0x84: + case 0x86: + case 0x94: + gr_def(ctx, 0x401c00, 0x044d00df); + break; + case 0x92: + case 0x96: + case 0x98: + gr_def(ctx, 0x401c00, 0x042500df); + break; + } + cp_ctx(ctx, 0x402400, 0x1); + if (dev_priv->chipset == 0x50) + cp_ctx(ctx, 0x402408, 0x1); + else + cp_ctx(ctx, 0x402408, 0x2); + gr_def(ctx, 0x402408, 0x00000600); + cp_ctx(ctx, 0x402800, 0x1); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x402800, 0x00000006); + cp_ctx(ctx, 0x402c08, 0x6); + if (dev_priv->chipset != 0x50) + gr_def(ctx, 0x402c14, 0x01000000); + gr_def(ctx, 0x402c18, 0x000000ff); + if (dev_priv->chipset == 0x50) + cp_ctx(ctx, 0x402ca0, 0x1); + else + cp_ctx(ctx, 0x402ca0, 0x2); + gr_def(ctx, 0x402ca0, 0x00000400); + cp_ctx(ctx, 0x402cac, 0x4); + cp_ctx(ctx, 0x403004, 0x1); + gr_def(ctx, 0x403004, 0x00000001); + cp_ctx(ctx, 0x405000, 0x1); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x405000, 0x00300080); + else if (dev_priv->chipset == 0x84) + gr_def(ctx, 0x405000, 0x000e0080); + else + gr_def(ctx, 0x405000, 0x00000080); + cp_ctx(ctx, 0x405014, 0x1); + gr_def(ctx, 0x405014, 0x00000004); + cp_ctx(ctx, 0x40501c, 0x1); + cp_ctx(ctx, 0x405024, 0x1); + cp_ctx(ctx, 0x40502c, 0x1); + if (dev_priv->chipset >= 0x94) { + cp_ctx(ctx, 0x405400, 0xcc); + offset = 4; + } else if (dev_priv->chipset != 0x50) { + cp_ctx(ctx, 0x405400, 0xcb); + offset = 0; + } else { + cp_ctx(ctx, 0x405400, 0xea); + offset = 0; + } + gr_def(ctx, 0x40540c, 0x00000002); + gr_def(ctx, 0x405410, 0x00000001); + + gr_def(ctx, 0x40541c + offset, 0x00000001); + gr_def(ctx, 0x405420 + offset, 0x00000100); + gr_def(ctx, 0x405438 + offset, 0x00000002); + gr_def(ctx, 0x40543c + offset, 0x00000001); + gr_def(ctx, 0x405440 + offset, 0x00000001); + gr_def(ctx, 0x405450 + offset, 0x00000001); + gr_def(ctx, 0x405454 + offset, 0x003fffff); + gr_def(ctx, 0x405458 + offset, 0x00001fff); + gr_def(ctx, 0x405460 + offset, 0x00000001); + gr_def(ctx, 0x405464 + offset, 0x00000001); + gr_def(ctx, 0x40546c + offset, 0x00000001); + gr_def(ctx, 0x405470 + offset, 0x00000001); + gr_def(ctx, 0x405474 + offset, 0x00000001); + gr_def(ctx, 0x405478 + offset, 0x00000004); + gr_def(ctx, 0x40547c + offset, 0x00000001); + gr_def(ctx, 0x405480 + offset, 0x00000001); + gr_def(ctx, 0x405484 + offset, 0x00000001); + gr_def(ctx, 0x405488 + offset, 0x00000007); + gr_def(ctx, 0x40548c + offset, 0x00000001); + gr_def(ctx, 0x405490 + offset, 0x00000007); + gr_def(ctx, 0x405494 + offset, 0x00000001); + gr_def(ctx, 0x405498 + offset, 0x00000001); + gr_def(ctx, 0x40549c + offset, 0x00000001); + if (dev_priv->chipset == 0x50) { + gr_def(ctx, 0x4054b0, 0x00000001); + gr_def(ctx, 0x4054b4, 0x00000001); + gr_def(ctx, 0x4054bc, 0x00000001); + gr_def(ctx, 0x4054c0, 0x0000000a); + gr_def(ctx, 0x4054d0, 0x00000040); + gr_def(ctx, 0x4054d8, 0x00000002); + gr_def(ctx, 0x4054dc, 0x00000100); + gr_def(ctx, 0x4054e0, 0x00000001); + gr_def(ctx, 0x4054e4, 0x00000100); + gr_def(ctx, 0x405500, 0x00000001); + gr_def(ctx, 0x405524, 0x00000004); + gr_def(ctx, 0x40553c, 0x00000001); + gr_def(ctx, 0x405540, 0x00000100); + gr_def(ctx, 0x405548, 0x00000001); + gr_def(ctx, 0x405554, 0x00000100); + gr_def(ctx, 0x405558, 0x00000001); + gr_def(ctx, 0x40555c, 0x00000100); + gr_def(ctx, 0x405564, 0x00000001); + gr_def(ctx, 0x405570, 0x00000100); + gr_def(ctx, 0x405574, 0x00000001); + gr_def(ctx, 0x40557c, 0x00000001); + gr_def(ctx, 0x405588, 0x00000002); + gr_def(ctx, 0x405590, 0x00000001); + gr_def(ctx, 0x405598, 0x00000001); + gr_def(ctx, 0x4055ac, 0x00000003); + offset = 0xd0; + } else { + gr_def(ctx, 0x4054b0 + offset, 0x00000001); + gr_def(ctx, 0x4054b4 + offset, 0x00000100); + gr_def(ctx, 0x4054bc + offset, 0x00000001); + gr_def(ctx, 0x4054c8 + offset, 0x00000100); + gr_def(ctx, 0x4054cc + offset, 0x00000001); + gr_def(ctx, 0x4054d0 + offset, 0x00000100); + gr_def(ctx, 0x4054d8 + offset, 0x00000001); + gr_def(ctx, 0x4054e4 + offset, 0x00000100); + } + gr_def(ctx, 0x4054f8 + offset, 0x00000004); + gr_def(ctx, 0x4054fc + offset, 0x00000070); + gr_def(ctx, 0x405500 + offset, 0x00000080); + gr_def(ctx, 0x405514 + offset, 0x0000000c); + if (dev_priv->chipset == 0x50) + offset = 0xcc; + gr_def(ctx, 0x40551c + offset, 0x00000008); + gr_def(ctx, 0x405520 + offset, 0x00000014); + if (dev_priv->chipset == 0x50) { + gr_def(ctx, 0x4055f0, 0x00000026); + offset = 0xb4; + } else { + gr_def(ctx, 0x405528 + offset, 0x00000029); + gr_def(ctx, 0x40552c + offset, 0x00000027); + gr_def(ctx, 0x405530 + offset, 0x00000026); + gr_def(ctx, 0x405534 + offset, 0x00000008); + gr_def(ctx, 0x405538 + offset, 0x00000004); + gr_def(ctx, 0x40553c + offset, 0x00000027); + } + gr_def(ctx, 0x405548 + offset, 0x00000001); + gr_def(ctx, 0x40554c + offset, 0x00000002); + gr_def(ctx, 0x405550 + offset, 0x00000003); + gr_def(ctx, 0x405554 + offset, 0x00000004); + gr_def(ctx, 0x405558 + offset, 0x00000005); + gr_def(ctx, 0x40555c + offset, 0x00000006); + gr_def(ctx, 0x405560 + offset, 0x00000007); + gr_def(ctx, 0x405564 + offset, 0x00000001); + gr_def(ctx, 0x4055a8 + offset, 0x000000cf); + if (dev_priv->chipset == 0x50) + offset = 0xb0; + gr_def(ctx, 0x4055d8 + offset, 0x00000080); + gr_def(ctx, 0x4055dc + offset, 0x00000004); + gr_def(ctx, 0x4055e0 + offset, 0x00000004); + if (dev_priv->chipset == 0x50) + offset = 0xac; + else + gr_def(ctx, 0x4055e4 + offset, 0x00000003); + gr_def(ctx, 0x4055e8 + offset, 0x00000001); + if (dev_priv->chipset == 0x50) + offset = 0xa8; + gr_def(ctx, 0x4055f4 + offset, 0x00000012); + gr_def(ctx, 0x4055f8 + offset, 0x00000010); + gr_def(ctx, 0x4055fc + offset, 0x0000000c); + gr_def(ctx, 0x405600 + offset, 0x00000001); + gr_def(ctx, 0x405610 + offset, 0x00000004); + gr_def(ctx, 0x405614 + offset, 0x00000002); + gr_def(ctx, 0x405618 + offset, 0x00000004); + gr_def(ctx, 0x405624 + offset, 0x003fffff); + gr_def(ctx, 0x405628 + offset, 0x00001fff); + if (dev_priv->chipset == 0x50) + offset = 0x88; + gr_def(ctx, 0x405650 + offset, 0x00000004); + gr_def(ctx, 0x405654 + offset, 0x00000014); + gr_def(ctx, 0x405658 + offset, 0x00000001); + gr_def(ctx, 0x405664 + offset, 0x00000002); + gr_def(ctx, 0x405670 + offset, 0x00000001); + gr_def(ctx, 0x405678 + offset, 0x00000002); + gr_def(ctx, 0x40567c + offset, 0x00001000); + if (dev_priv->chipset == 0x50) + offset = 0x7c; + else { + gr_def(ctx, 0x405680 + offset, 0x00000e00); + gr_def(ctx, 0x405684 + offset, 0x00001000); + gr_def(ctx, 0x405688 + offset, 0x00001e00); + } + gr_def(ctx, 0x405690 + offset, 0x00000001); + gr_def(ctx, 0x405694 + offset, 0x00000001); + gr_def(ctx, 0x405698 + offset, 0x00000001); + gr_def(ctx, 0x40569c + offset, 0x00000001); + gr_def(ctx, 0x4056a0 + offset, 0x00000001); + gr_def(ctx, 0x4056b0 + offset, 0x00000200); + gr_def(ctx, 0x4056b8 + offset, 0x00000001); + gr_def(ctx, 0x4056bc + offset, 0x00000070); + gr_def(ctx, 0x4056c0 + offset, 0x00000080); + gr_def(ctx, 0x4056cc + offset, 0x00000001); + gr_def(ctx, 0x4056d0 + offset, 0x00000070); + gr_def(ctx, 0x4056d4 + offset, 0x00000080); + gr_def(ctx, 0x4056e4 + offset, 0x00000001); + gr_def(ctx, 0x4056e8 + offset, 0x000000cf); + gr_def(ctx, 0x4056f0 + offset, 0x00000001); + gr_def(ctx, 0x405700 + offset, 0x000000cf); + gr_def(ctx, 0x405708 + offset, 0x00000002); + gr_def(ctx, 0x405710 + offset, 0x00000001); + gr_def(ctx, 0x405718 + offset, 0x00000001); + gr_def(ctx, 0x405720 + offset, 0x000000cf); + gr_def(ctx, 0x405724 + offset, 0x000000cf); + gr_def(ctx, 0x405728 + offset, 0x00000001); + + if (dev_priv->chipset == 0x50) + cp_ctx(ctx, 0x4063e0, 0x1); + + if (dev_priv->chipset < 0x90) { + cp_ctx(ctx, 0x406814, 0x2b); + gr_def(ctx, 0x406818, 0x00000f80); + gr_def(ctx, 0x406860, 0x007f0080); + gr_def(ctx, 0x40689c, 0x007f0080); + } else { + cp_ctx(ctx, 0x406814, 0x4); + if (dev_priv->chipset == 0x98) { + gr_def(ctx, 0x406818, 0x00000f80); + } else { + gr_def(ctx, 0x406818, 0x00001f80); + } + cp_ctx(ctx, 0x406830, 0x3); + } + /* per-ROP group state */ + for (i = 0; i < 8; i++) { + if (units & (1<<(i+16))) { + cp_ctx(ctx, 0x407000 + (i<<8), 3); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x407000 + (i<<8), 0x1b74f820); + else + gr_def(ctx, 0x407000 + (i<<8), 0x3b74f821); + gr_def(ctx, 0x407004 + (i<<8), 0x89058001); + if (dev_priv->chipset == 0x50) { + cp_ctx(ctx, 0x407010 + (i<<8), 1); + } else { + cp_ctx(ctx, 0x407010 + (i<<8), 2); + gr_def(ctx, 0x407010 + (i<<8), 0x00001000); + gr_def(ctx, 0x407014 + (i<<8), 0x0000001f); + } + cp_ctx(ctx, 0x407080 + (i<<8), 4); + gr_def(ctx, 0x407080 + (i<<8), 0x027c10fa); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x407084 + (i<<8), 0x000000c0); + else + gr_def(ctx, 0x407084 + (i<<8), 0x400000c0); + gr_def(ctx, 0x407088 + (i<<8), 0xb7892080); + cp_ctx(ctx, 0x407094 + (i<<8), 1); + } + } + cp_ctx(ctx, 0x407c00, 0x3); + if (dev_priv->chipset >= 0x90) + gr_def(ctx, 0x407c00, 0x00390040); + else + gr_def(ctx, 0x407c00, 0x00010040); + gr_def(ctx, 0x407c08, 0x00000022); + cp_ctx(ctx, 0x407d00, 0x9); + if (dev_priv->chipset == 0x98) + gr_def(ctx, 0x407d08, 0x00380040); + else { + if (dev_priv->chipset >= 0x90) + gr_def(ctx, 0x407d08, 0x00390040); + else + gr_def(ctx, 0x407d08, 0x00010040); + gr_def(ctx, 0x407d0c, 0x00000022); + } + /* per-TP state */ + for (i = 0; i < 8; i++) { + if (units & (1<<i)) { + cp_ctx(ctx, 0x408200 + (i<<12), 0x44); + /* per-MP state */ + for (j = 0; j < 2; j++) { + gr_def(ctx, 0x408200 + (i<<12) + (j<<7), 0x01800000); + gr_def(ctx, 0x408204 + (i<<12) + (j<<7), 0x00160000); + gr_def(ctx, 0x408208 + (i<<12) + (j<<7), 0x01800000); + gr_def(ctx, 0x408218 + (i<<12) + (j<<7), 0x0003ffff); + switch (dev_priv->chipset) { + case 0x50: + gr_def(ctx, 0x40821c + (i<<12) + (j<<7), 0x00080000); + break; + case 0x84: + gr_def(ctx, 0x40821c + (i<<12) + (j<<7), 0x00880000); + break; + case 0x86: + gr_def(ctx, 0x40821c + (i<<12) + (j<<7), 0x008c0000); + break; + case 0x92: + case 0x96: + case 0x98: + gr_def(ctx, 0x40821c + (i<<12) + (j<<7), 0x118c0000); + break; + case 0x94: + gr_def(ctx, 0x40821c + (i<<12) + (j<<7), 0x10880000); + break; + } + gr_def(ctx, 0x408240 + (i<<12) + (j<<7), 0x00010401); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x408248 + (i<<12) + (j<<7), 0x00000040); + else + gr_def(ctx, 0x408248 + (i<<12) + (j<<7), 0x00000078); + gr_def(ctx, 0x408250 + (i<<12) + (j<<7), 0x000000bf); + gr_def(ctx, 0x408258 + (i<<12) + (j<<7), 0x00001210); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x40825c + (i<<12) + (j<<7), 0x00000080); + else + gr_def(ctx, 0x40825c + (i<<12) + (j<<7), 0x08000080); + } + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x408304 + (i<<12), 0x00007070); + else + gr_def(ctx, 0x408304 + (i<<12), 0x00027070); + cp_ctx(ctx, 0x408318 + (i<<12), 1); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x408318 + (i<<12), 0x0003ffff); + else + gr_def(ctx, 0x408318 + (i<<12), 0x03ffffff); + cp_ctx(ctx, 0x408324 + (i<<12), 5); + cp_ctx(ctx, 0x408340 + (i<<12), 9); + gr_def(ctx, 0x408340 + (i<<12), 0x00120407); + gr_def(ctx, 0x408344 + (i<<12), 0x05091507); + if (dev_priv->chipset == 0x84) + gr_def(ctx, 0x408348 + (i<<12), 0x05100202); + else + gr_def(ctx, 0x408348 + (i<<12), 0x05010202); + gr_def(ctx, 0x40834c + (i<<12), 0x00030201); + cp_ctx(ctx, 0x408400 + (i<<12), 2); + gr_def(ctx, 0x408404 + (i<<12), 0x00000040); + cp_ctx(ctx, 0x40840c + (i<<12), 2); + gr_def(ctx, 0x40840c + (i<<12), 0x0d0c0b0a); + gr_def(ctx, 0x408410 + (i<<12), 0x00141210); + cp_ctx(ctx, 0x408800 + (i<<12), 6); + gr_def(ctx, 0x408800 + (i<<12), 0x000001f0); + gr_def(ctx, 0x408804 + (i<<12), 0x00000001); + gr_def(ctx, 0x408808 + (i<<12), 0x00000003); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x40880c + (i<<12), 0x00008000); + gr_def(ctx, 0x408814 + (i<<12), 0x00039e00); + cp_ctx(ctx, 0x40881c + (i<<12), 2); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x40881c + (i<<12), 0x00000040); + else + gr_def(ctx, 0x40881c + (i<<12), 0x00000100); + gr_def(ctx, 0x408820 + (i<<12), 0x00003800); + cp_ctx(ctx, 0x408a00 + (i<<12), 1); + gr_def(ctx, 0x408a00 + (i<<12), 0x00404040); + cp_ctx(ctx, 0x408c00 + (i<<12), 1); + gr_def(ctx, 0x408c00 + (i<<12), 0x0000ff0a); + cp_ctx(ctx, 0x408c08 + (i<<12), 1); + cp_ctx(ctx, 0x408e00 + (i<<12), 2); + gr_def(ctx, 0x408e00 + (i<<12), 0x0077f005); + if (dev_priv->chipset == 0x50) + gr_def(ctx, 0x408e04 + (i<<12), 0x00007fff); + else + gr_def(ctx, 0x408e04 + (i<<12), 0x003f7fff); + cp_ctx(ctx, 0x408e2c + (i<<12), 1); + if (dev_priv->chipset == 0x50) { + cp_ctx(ctx, 0x408e50 + (i<<12), 9); + gr_def(ctx, 0x408e54 + (i<<12), 0x000003ff); + gr_def(ctx, 0x408e58 + (i<<12), 0x00000003); + gr_def(ctx, 0x408e5c + (i<<12), 0x00000003); + gr_def(ctx, 0x408e60 + (i<<12), 0x000001ff); + gr_def(ctx, 0x408e64 + (i<<12), 0x0000001f); + gr_def(ctx, 0x408e68 + (i<<12), 0x0000000f); + gr_def(ctx, 0x408e6c + (i<<12), 0x0000000f); + } else { + cp_ctx(ctx, 0x408e50 + (i<<12), 1); + cp_ctx(ctx, 0x408e70 + (i<<12), 1); + } + } + } +} + +static void +nv50_graph_construct_xfer1(struct nouveau_grctx *ctx) +{ + struct drm_nouveau_private *dev_priv = ctx->dev->dev_private; + int i, j; + int offset; + int m2mf_offset, a02_offset, a03_offset, a04_offset, a05_offset, a06_offset, a07_offset, a08_offset; + int a11_offset, a12_offset, a13_offset; + int a21_offset, a22_offset, a23_offset, a24_offset, a25_offset, a26_offset, a27_offset, a28_offset, a29_offset, a2a_offset; + int a3_size, a31_offset, a32_offset, a33_offset, a34_offset; + int a4_size, a41_offset, a42_offset, a43_offset, a44_offset, a45_offset, a46_offset, a47_offset, a48_offset; + int magic1, magic2, magic3; + uint32_t units = nv_rd32 (ctx->dev, 0x1540); + int b1_size; + switch (dev_priv->chipset) { + case 0x50: + b1_size = 0x2627; + break; + case 0x84: + case 0x86: + b1_size = 0x2916; + break; + case 0x92: + case 0x94: + case 0x96: + b1_size = 0x2dd2; + break; + case 0x98: + b1_size = 0x2912; + break; + } + if (dev_priv->chipset == 0x50) { + magic1 = 0x3ff; + magic2 = 0x00003e60; + } else { + magic1 = 0x7ff; + magic2 = 0x001ffe67; + } + + offset = (ctx->ctxvals_pos+0x3f)&~0x3f; + ctx->ctxvals_base = offset; + ctx->ctxvals_pos = offset + b1_size * 8; + ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f; + cp_lsr (ctx, offset); + cp_out (ctx, CP_SET_XFER_POINTER); + cp_lsr (ctx, b1_size); + cp_out (ctx, CP_SEEK_MAGIC_UNK01); + cp_out (ctx, CP_XFER_MAGIC_UNK01); + cp_wait(ctx, XFER, BUSY); + + if (ctx->mode != NOUVEAU_GRCTX_VALS) + return; + + /* area 0 */ + + switch (dev_priv->chipset) { + case 0x50: + m2mf_offset = offset + 0x4c8; + a02_offset = offset + 0x2bb8; + a03_offset = offset + 0x10e20; + a04_offset = offset + 0x10e40; + a05_offset = offset + 0x12ca0; + a06_offset = a05_offset + 0x120; + a07_offset = a06_offset + 0x1a0; + a08_offset = a07_offset + 0x1c0; + break; + case 0x84: + case 0x86: + m2mf_offset = offset + 0x1c20; + a02_offset = offset + 0x4310; + a03_offset = offset + 0x12578; + a04_offset = offset + 0x125a0; + a05_offset = offset + 0x14400; + a06_offset = a05_offset + 0x128; + a07_offset = a06_offset + 0x1a8; + a08_offset = a07_offset + 0x1c8; + break; + case 0x92: + case 0x94: + case 0x96: + m2mf_offset = offset + 0x1c00; + a02_offset = offset + 0x68f0; + a03_offset = offset + 0x14b58; + a04_offset = offset + 0x14b80; + a05_offset = offset + 0x169e0; + a06_offset = a05_offset + 0x128; + a07_offset = a06_offset + 0x1a8; + a08_offset = a07_offset + 0x1c8; + break; + case 0x98: + m2mf_offset = offset + 0x1c00; + a02_offset = offset + 0x42f0; + a03_offset = offset + 0x12558; + a04_offset = offset + 0x12580; + a05_offset = offset + 0x143e0; + a06_offset = a05_offset + 0x128; + a07_offset = a06_offset + 0x1a8; + a08_offset = a07_offset + 0x1c8; + break; + } + + nv50_graph_construct_xfer_m2mf (ctx, m2mf_offset); + + nv_wo32(ctx->dev, ctx->data, a02_offset + 0x0, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a02_offset + 0x8, 0x00000004); + + nv_wo32(ctx->dev, ctx->data, a03_offset + 0x00, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a03_offset + 0x08, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a03_offset + 0x10, 0x08100c12); + if (dev_priv->chipset != 0x50) + nv_wo32(ctx->dev, ctx->data, a03_offset + 0x18, 0x00000003); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x00, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x10, 0x00080c14); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x18, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x20, 0x00080c14); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x38, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x40, 0x00000027); + nv_wo32(ctx->dev, ctx->data, a04_offset + 0x58, 0x00000001); + + + nv_wo32(ctx->dev, ctx->data, a05_offset + 0x008, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a05_offset + 0x0c0, 0x08100c12); + + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x000, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x008, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x018, 0x00000080); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x038, 0x00000080); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x048, 0x0000003f); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x0a0, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x0a8, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x0b0, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x0f8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a06_offset + 0x118, 0x00000004); + + nv_wo32(ctx->dev, ctx->data, a07_offset + 0x000, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a07_offset + 0x008, 0x00001001); + for (i = 0; i < 0x20; i += 8) + nv_wo32(ctx->dev, ctx->data, a07_offset + 0x010 + i, 0x0000ffff); + for (i = 0; i < 0x80; i += 8) + nv_wo32(ctx->dev, ctx->data, a07_offset + 0x130 + i, 0x3f800000); + nv_wo32(ctx->dev, ctx->data, a07_offset + 0x1b0, 0x00000010); + + nv_wo32(ctx->dev, ctx->data, a08_offset + 0x000, 0x00000003); + + /* area 1 */ + a11_offset = offset + 0x1; + if (dev_priv->chipset == 0x50) + a12_offset = offset + 0xaf9; + else + a12_offset = offset + 0xb01; + switch (dev_priv->chipset) { + case 0x50: + a13_offset = offset + 0x5ba9; + break; + case 0x84: + a13_offset = offset + 0x5a71; + break; + case 0x86: + a13_offset = offset + 0x5731; + break; + case 0x92: + a13_offset = offset + 0x5bb1; + break; + case 0x94: + case 0x96: + a13_offset = offset + 0x5931; + break; + case 0x98: + a13_offset = offset + 0x5731; + break; + + } + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x020, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x048, 0x00000020); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x0c0, 0x0000001a); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x130, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x138, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x148, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x150, 0x00000008); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x160, magic1); + nv_wo32(ctx->dev, ctx->data, a11_offset + 0x1c8, 0x0000000f); + + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x0000, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x0010, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x02e8, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4490, 0x0000000f); + if (dev_priv->chipset == 0x50) { + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4520, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4528, 0x00000001); + } + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4538, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4540, 0x00000100); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4548, 0x00000100); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4550, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4560, 0x00000008); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4590, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x45a0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x45a8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x45b0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x45b8, 0x000000cf); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x45c0, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x45f8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4608, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4610, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4618, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4640, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4650, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4658, 0x00000015); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4678, 0x04444480); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4858, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4880, 0x00000100); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x4898, 0x00010001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x48a8, 0x00010001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x48b0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x48b8, 0x00010001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x48c0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x48c8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a12_offset + 0x48d0, 0x00000002); + + nv_wo32(ctx->dev, ctx->data, a13_offset + 0x00, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a13_offset + 0x08, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a13_offset + 0x30, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a13_offset + 0x90, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a13_offset + 0x98, 0x04e3bfdf); + + /* area 2 */ + switch (dev_priv->chipset) { + case 0x50: + case 0x92: + a21_offset = offset + 0x5402; + break; + case 0x84: + a21_offset = offset + 0x53f2; + break; + case 0x94: + case 0x96: + a21_offset = offset + 0x53e2; + break; + case 0x86: + case 0x98: + a21_offset = offset + 0x53d2; + break; + } + if (dev_priv->chipset == 0x50) { + a22_offset = a21_offset + 0x380; + a23_offset = a22_offset + 0x300; + a24_offset = a23_offset + 0x140; + a26_offset = a24_offset + 0x230; + a27_offset = a26_offset + 0x38; + a28_offset = a27_offset + 0x20; + a29_offset = a28_offset + 0x690; + } else { + a22_offset = a21_offset + 0x390; + a23_offset = a22_offset + 0x308; + a24_offset = a23_offset + 0x150; + a25_offset = a24_offset + 0x250; + a26_offset = a25_offset + 0x48; + a27_offset = a26_offset + 0x40; + a28_offset = a27_offset + 0x28; + a29_offset = a28_offset + 0x6a8; + } + switch (dev_priv->chipset) { + case 0x50: + case 0x86: + case 0x98: + a2a_offset = a29_offset + 0x2ab0; + break; + case 0x84: + case 0x92: + case 0x94: + case 0x96: + a2a_offset = a29_offset + 0x4ab0; + break; + } + + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x000, 0x003fffff); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x018, 0x00001fff); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x090, 0x3f800000); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x0c8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x0d0, 0x0000001a); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x0e8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x180, 0x00ffff00); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x1b8, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x1f8, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x200, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x280, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x2a8, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x2b0, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x2b8, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x2d8, 0x00000005); + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x2e0, 0x00000052); + if (dev_priv->chipset != 0x50) + nv_wo32(ctx->dev, ctx->data, a21_offset + 0x308, 0x00000001); + + for (i = 0; i < 0x80; i += 8) + nv_wo32(ctx->dev, ctx->data, a22_offset + i, 0x3f800000); + nv_wo32(ctx->dev, ctx->data, a22_offset + 0x080, 0x00000010); + nv_wo32(ctx->dev, ctx->data, a22_offset + 0x1b8, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, a22_offset + 0x1c0, 0x00000005); + nv_wo32(ctx->dev, ctx->data, a22_offset + 0x1d8, 0x00000001); + for (i = 0; i < 0x20; i += 8) + nv_wo32(ctx->dev, ctx->data, a22_offset + 0x1e8 + i, 0x0000ffff); + if (dev_priv->chipset != 0x50) + nv_wo32(ctx->dev, ctx->data, a22_offset + 0x208, 0x00000003); + + nv_wo32(ctx->dev, ctx->data, a23_offset + 0, 0x00ffff00); + nv_wo32(ctx->dev, ctx->data, a23_offset + 8, 0x0000001a); + if (dev_priv->chipset != 0x50) + nv_wo32(ctx->dev, ctx->data, a23_offset + 0x18, 0x00000003); + + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x00, 0x00000102); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x10, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x18, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x20, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x28, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x30, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x38, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x48, magic1); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0x58, 0x00000102); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0xa8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0xb0, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0xb8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a24_offset + 0xc0, 0x00000004); + + if (dev_priv->chipset != 0x50) { + nv_wo32(ctx->dev, ctx->data, a25_offset + 0x00, 0x00080c14); + nv_wo32(ctx->dev, ctx->data, a25_offset + 0x18, 0x00000804); + nv_wo32(ctx->dev, ctx->data, a25_offset + 0x28, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a25_offset + 0x30, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a25_offset + 0x38, 0x08100c12); + } + + nv_wo32(ctx->dev, ctx->data, a26_offset + 0x00, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a26_offset + 0x08, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a26_offset + 0x18, 0x00000010); + + nv_wo32(ctx->dev, ctx->data, a27_offset + 0x00, 0x00000804); + nv_wo32(ctx->dev, ctx->data, a27_offset + 0x08, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a27_offset + 0x10, 0x0000001a); + if (dev_priv->chipset != 0x50) + nv_wo32(ctx->dev, ctx->data, a27_offset + 0x18, 0x0000007f); + + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0000, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0008, 0x00080c14); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0018, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0020, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0028, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0038, 0x00000010); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0058, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0060, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0098, magic1); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x00a0, 0x00080c14); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0268, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0280, 0x00000010); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0448, 0x00000088); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0450, 0x00000088); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0468, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0520, 0x00000026); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0538, 0x3f800000); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0558, 0x0000001a); + nv_wo32(ctx->dev, ctx->data, a28_offset + 0x0560, 0x00000010); + + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x00, 0x00000052); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x10, 0x00000026); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x20, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x28, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x38, 0x0000001a); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x50, 0x00ffff00); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x60, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x68, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x78, 0x00000080); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x80, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x88, 0x00080c14); + nv_wo32(ctx->dev, ctx->data, a29_offset + 0x98, magic1); + + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x000, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x008, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x018, 0x00000080); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x020, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x028, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x038, 0x00000027); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x048, 0x00000026); + for (i = 0; i < 0x80; i += 8) + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x068 + i, 0x04000000); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x208, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x210, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a2a_offset + 0x228, 0x0001fe21); + + /* area 3: per-ROP group state */ + for (i = 0; i < 8; i++) { + if (units & (1<<(i+16))) { + switch (dev_priv->chipset) { + case 0x50: + a3_size = 0x990; + a31_offset = offset + i*a3_size + 3; + a32_offset = 0; + a33_offset = offset + i*a3_size + 0x113; + a34_offset = offset + i*a3_size + 0x55b; + break; + case 0x84: + case 0x94: + case 0x96: + a3_size = 0x9a0; + a31_offset = offset + i*a3_size + 3; + a32_offset = 0; + a33_offset = offset + i*a3_size + 0x113; + a34_offset = offset + i*a3_size + 0x56b; + break; + case 0x86: + case 0x92: + case 0x98: + a3_size = 0x9e0; + a31_offset = offset + i*a3_size + 3; + a32_offset = offset + i*a3_size + 0x103; + a33_offset = offset + i*a3_size + 0x153; + a34_offset = offset + i*a3_size + 0x5ab; + break; + } + nv_wo32(ctx->dev, ctx->data, a31_offset + 0x40, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a31_offset + 0x50, magic2); + nv_wo32(ctx->dev, ctx->data, a31_offset + 0xb8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a31_offset + 0xc0, 0x00000010); + nv_wo32(ctx->dev, ctx->data, a31_offset + 0xd8, 0x00000001); + + if (a32_offset) { + nv_wo32(ctx->dev, ctx->data, a32_offset + 0x00, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a32_offset + 0x08, 0x00000400); + nv_wo32(ctx->dev, ctx->data, a32_offset + 0x10, 0x00000300); + nv_wo32(ctx->dev, ctx->data, a32_offset + 0x18, 0x00001001); + nv_wo32(ctx->dev, ctx->data, a32_offset + 0x20, 0x00000015); + } + + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x000, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x048, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x050, 0x00000010); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x060, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x100, 0x00000010); + for (j = 0; j < 0x80; j += 8) + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x188 + j, 0x3f800000); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x2d0, 0x00000010); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x2e0, 0x0000003f); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x318, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x328, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x338, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x3a0, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x3e0, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a33_offset + 0x420, 0x00000011); + + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x000, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x008, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x010, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x018, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x020, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x028, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x030, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x040, magic2); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x050, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x100, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x108, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x110, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x118, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x120, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x128, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x130, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x150, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a34_offset + 0x158, 0x00000001); + } + } + + /* areas 4-7: per-TP state */ + for (i = 0; i < 8; i++) { + if (units & (1<<i)) { + switch (dev_priv->chipset) { + case 0x50: + a4_size = 0x60d8; + magic3 = 0x1000; + a41_offset = offset + (i&1)*a4_size + 4 + (i>>1); + a42_offset = a41_offset + 0x128; + a43_offset = a42_offset + 0x158; + a44_offset = a43_offset + 0x5178; + a45_offset = a44_offset + 0x488; + a46_offset = a45_offset + 0x6b0; + a47_offset = a46_offset + 0x28; + a48_offset = a47_offset + 0xb0; + break; + case 0x84: + case 0x92: + a4_size = 0x6100; + magic3 = 0; + a41_offset = offset + (i&1)*a4_size + 4 + (i>>1); + a42_offset = a41_offset + 0x120; + a43_offset = a42_offset + 0x150; + a44_offset = a43_offset + 0x5178; + a45_offset = a44_offset + 0x498; + a46_offset = a45_offset + 0x6b8; + a47_offset = a46_offset + 0x30; + a48_offset = a47_offset + 0xc8; + break; + case 0x94: + case 0x96: + a4_size = 0x9100; + magic3 = 0; + a41_offset = offset + (i&1)*a4_size + 4 + (i>>1); + a42_offset = a41_offset + 0x120; + a43_offset = a42_offset + 0x150; + a44_offset = a43_offset + 0x8178; + a45_offset = a44_offset + 0x498; + a46_offset = a45_offset + 0x6b8; + a47_offset = a46_offset + 0x30; + a48_offset = a47_offset + 0xc8; + break; + case 0x86: + case 0x98: + a4_size = 0x6110; + magic3 = 0x1e00; + a41_offset = offset + (i&1)*a4_size + 4 + (i>>1); + a42_offset = a41_offset + 0x128; + a43_offset = a42_offset + 0x158; + a44_offset = a43_offset + 0x5178; + a45_offset = a44_offset + 0x498; + a46_offset = a45_offset + 0x6b8; + a47_offset = a46_offset + 0x30; + a48_offset = a47_offset + 0xc8; + break; + } + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x008, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x0b8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x0c0, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x0c8, 0x00608080); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x0f0, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x108, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x110, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x118, 0x00000080); + if (magic3) + nv_wo32(ctx->dev, ctx->data, a41_offset + 0x120, magic3); + + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x000, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x128, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x130, 0x00000080); + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x138, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x140, 0x03020100); + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x148, 0x00000003); + if (magic3) + nv_wo32(ctx->dev, ctx->data, a42_offset + 0x150, magic3); + + nv_wo32(ctx->dev, ctx->data, a43_offset + 0x00, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a43_offset + 0x28, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a43_offset + 0x30, 0x00000003); + nv_wo32(ctx->dev, ctx->data, a43_offset + 0x50, 0x00000004); + + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x000, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x008, 0x00000003); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x098, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x0f8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x100, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x108, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x110, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x118, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x160, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x180, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x1b0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x218, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x220, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x228, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x230, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x238, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x240, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x248, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x258, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x298, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x2b0, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x2d0, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x2d8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x2e8, 0x000000cf); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x2f0, 0x000000cf); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x2f8, 0x000000cf); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x350, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x358, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x360, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x368, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x370, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x378, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x380, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x390, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x398, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3a0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3a8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3b0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3b8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3c0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3c8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x3d0, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x410, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x418, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x458, magic2); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x470, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a44_offset + 0x478, 0x00000001); + + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x000, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x030, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x058, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x098, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x0b8, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x0c0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x0d0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x0e0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x0f0, magic1); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x100, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x110, 0x00000001); + for (j = 0; j < 0x40; j += 8) + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x268 + j, 0x00000008); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x2a8, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x2e8, 0x0fac6881); + for (j = 0; j < 0x40; j += 8) + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x2f0 + j, 0x00000400); + for (j = 0; j < 0x40; j += 8) + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x330 + j, 0x00000300); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x370, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x378, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x3b8, 0x00000020); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x3c0, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x3c8, 0x00000100); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x3d8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x3f0, 0x00000040); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x3f8, 0x00000100); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x408, 0x00000003); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x430, magic2); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x450, 0x00000002); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x458, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x4a8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x4d0, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x4e0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x4e8, 0x00000400); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x4f0, 0x00000300); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x4f8, 0x00001001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x518, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x558, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x560, 0x0000000f); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x620, magic2); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x640, 0x00000011); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x658, 0x00000004); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x668, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x670, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a45_offset + 0x690, 0x00000001); + + nv_wo32(ctx->dev, ctx->data, a46_offset + 0x00, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a46_offset + 0x10, 0x00000001); + + nv_wo32(ctx->dev, ctx->data, a47_offset + 0x00, 0x2a712488); + nv_wo32(ctx->dev, ctx->data, a47_offset + 0x10, 0x4085c000); + nv_wo32(ctx->dev, ctx->data, a47_offset + 0x18, 0x00000040); + nv_wo32(ctx->dev, ctx->data, a47_offset + 0x20, 0x00000100); + nv_wo32(ctx->dev, ctx->data, a47_offset + 0x28, 0x00010100); + nv_wo32(ctx->dev, ctx->data, a47_offset + 0x30, 0x02800000); + + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x00, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x08, 0x04e3bfdf); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x10, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x20, 0x00ffff00); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x28, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x40, 0x00ffff00); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x88, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0x98, 0x00000001); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0xa0, 0x30201000); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0xa8, 0x70605040); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0xb0, 0xb8a89888); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0xb8, 0xf8e8d8c8); + nv_wo32(ctx->dev, ctx->data, a48_offset + 0xc8, 0x0000001a); + } + } + +} + +static void +nv50_graph_construct_xfer2(struct nouveau_grctx *ctx) +{ + struct drm_nouveau_private *dev_priv = ctx->dev->dev_private; + int i, j; + uint32_t offset, b2_offset; + int mp1offset, mp2offset, mp3offset, mpcnt, mpsz, mainoffset, lastoffset; + uint32_t units = nv_rd32 (ctx->dev, 0x1540); + int b2_size; + switch (dev_priv->chipset) { + case 0x98: + b2_size = 0x425; + break; + case 0x84: + case 0x92: + b2_size = 0x43e; + break; + case 0x94: + case 0x96: + b2_size = 0x43f; + break; + case 0x50: + b2_size = 0x440; + break; + case 0x86: + b2_size = 0x442; + break; + } + + offset = (ctx->ctxvals_pos+0x3f)&~0x3f; + ctx->ctxvals_pos = offset + b2_size * 8; + ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f; + cp_lsr (ctx, offset); + cp_out (ctx, CP_SET_XFER_POINTER); + cp_lsr (ctx, b2_size); + cp_out (ctx, CP_SEEK_MAGIC_UNK02); + cp_out (ctx, CP_XFER_MAGIC_UNK02); + cp_wait(ctx, XFER, BUSY); + + if (ctx->mode != NOUVEAU_GRCTX_VALS) + return; + + nv_wo32(ctx->dev, ctx->data, offset + 0, 0x08100c12); + + for (i = 0; i < 8; i++) { + if (units & (1<<i)) { + b2_offset = offset + i; + if (!i) b2_offset += 8; + switch (dev_priv->chipset) { + case 0x50: + mp1offset = b2_offset; + mp2offset = mp1offset + 0x38;; + mp3offset = mp2offset + 0x28;; + mpsz = 0xd0; + break; + case 0x84: + case 0x92: + case 0x94: + case 0x96: + mp1offset = b2_offset; + mp2offset = mp1offset + 0x38;; + mp3offset = mp2offset + 0x18;; + mpsz = 0xc0; + break; + case 0x86: + case 0x98: + mp1offset = b2_offset; + mp2offset = mp1offset + 0x48;; + mp3offset = mp2offset + 0x18;; + mpsz = 0xd0; + break; + } + if (dev_priv->chipset == 0x98) + mpcnt = 1; + else + mpcnt = 2; + for (j = 0; j < mpcnt; j++) { + nv_wo32(ctx->dev, ctx->data, mp1offset + j * mpsz + 0x08, 0x00000080); + nv_wo32(ctx->dev, ctx->data, mp1offset + j * mpsz + 0x10, 0x80007004); + nv_wo32(ctx->dev, ctx->data, mp1offset + j * mpsz + 0x18, 0x04000400); + nv_wo32(ctx->dev, ctx->data, mp1offset + j * mpsz + 0x20, 0x00001000); + if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98) { + nv_wo32(ctx->dev, ctx->data, mp1offset + j * mpsz + 0x38, 0x00000e00); + nv_wo32(ctx->dev, ctx->data, mp1offset + j * mpsz + 0x40, 0x00001e00); + } + nv_wo32(ctx->dev, ctx->data, mp2offset + j * mpsz + 0x00, 0x00000001); + if (dev_priv->chipset == 0x50) { + nv_wo32(ctx->dev, ctx->data, mp2offset + j * mpsz + 0x18, 0x00001000); + nv_wo32(ctx->dev, ctx->data, mp2offset + j * mpsz + 0x20, 0x00001000); + } + nv_wo32(ctx->dev, ctx->data, mp3offset + j * mpsz + 0x00, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mp3offset + j * mpsz + 0x10, 0x00000004); + nv_wo32(ctx->dev, ctx->data, mp3offset + j * mpsz + 0x18, 0x00000002); + } + mainoffset = b2_offset + mpsz * mpcnt; + + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x000, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x038, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x040, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x048, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x050, 0x0000ffff); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x058, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x060, 0x00010001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x068, 0x00010001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x070, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x080, 0x0001fe21); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x0b0, 0x08100c12); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x0b8, 0x00000004); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x0c8, 0x00000002); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x0d0, 0x00000011); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x118, 0x0fac6881); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x140, 0x00000004); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x190, 0x00000002); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x198, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x1a0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x1a8, 0x00000002); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x1b0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x1b8, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x1c0, 0x00000001); + nv_wo32(ctx->dev, ctx->data, mainoffset + 0x1d0, 0x00000004); + + if (dev_priv->chipset == 0x50) + lastoffset = mainoffset + 0x1ed8; + else if (dev_priv->chipset < 0x94) + lastoffset = mainoffset + 0x1ee8; + else if (dev_priv->chipset < 0x98) + lastoffset = mainoffset + 0x1ef0; + else + lastoffset = mainoffset + 0x1ed0; + + nv_wo32(ctx->dev, ctx->data, lastoffset + 0x00, 0x00000011); + nv_wo32(ctx->dev, ctx->data, lastoffset + 0x10, 0x00000001); + } + } +} + +void +nv50_grctx_init(struct nouveau_grctx *ctx) +{ + /* decide whether we're loading/unloading the context */ + cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save); + cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save); + + cp_name(ctx, cp_check_load); + cp_bra (ctx, AUTO_LOAD, PENDING, cp_setup_auto_load); + cp_bra (ctx, USER_LOAD, PENDING, cp_setup_load); + cp_bra (ctx, ALWAYS, TRUE, cp_exit); + + /* setup for context load */ + cp_name(ctx, cp_setup_auto_load); + cp_out (ctx, CP_DISABLE1); + cp_out (ctx, CP_DISABLE2); + cp_out (ctx, CP_ENABLE); + cp_out (ctx, CP_NEXT_TO_SWAP); + cp_set (ctx, UNK01, SET); + cp_name(ctx, cp_setup_load); + cp_out (ctx, CP_NEWCTX); + cp_wait(ctx, NEWCTX, BUSY); + cp_set (ctx, UNK1D, CLEAR); + cp_set (ctx, SWAP_DIRECTION, LOAD); + cp_bra (ctx, UNK0B, SET, cp_prepare_exit); + cp_bra (ctx, ALWAYS, TRUE, cp_swap_state); + + /* setup for context save */ + cp_name(ctx, cp_setup_save); + cp_set (ctx, UNK1D, SET); + cp_wait(ctx, UNK40, CLEAR); + cp_set (ctx, UNK01, SET); + cp_set (ctx, SWAP_DIRECTION, SAVE); + + /* general PGRAPH state */ + cp_name(ctx, cp_swap_state); + cp_set (ctx, UNK03, SET); + cp_pos (ctx, 0x0011c/4); + cp_ctx (ctx, 0x400828, 1); /* needed. otherwise, flickering happens. */ + cp_pos (ctx, 0x00100/4); + nv50_graph_construct_general(ctx); + nv50_graph_construct_xfer1(ctx); + nv50_graph_construct_xfer2(ctx); + + cp_bra (ctx, SWAP_DIRECTION, SAVE, cp_check_load); + + cp_set (ctx, UNK20, SET); + cp_set (ctx, SWAP_DIRECTION, SAVE); /* no idea why this is needed, but fixes at least one lockup. */ + cp_lsr (ctx, ctx->ctxvals_base); + cp_out (ctx, CP_SET_XFER_POINTER); + cp_lsr (ctx, 4); + cp_out (ctx, CP_SEEK_MAGIC_UNK01); + cp_out (ctx, CP_XFER_MAGIC_UNK01); + cp_wait(ctx, XFER, BUSY); + + /* pre-exit state updates */ + cp_name(ctx, cp_prepare_exit); + cp_set (ctx, UNK01, CLEAR); + cp_set (ctx, UNK03, CLEAR); + cp_set (ctx, UNK1D, CLEAR); + + cp_bra (ctx, USER_SAVE, PENDING, cp_exit); + cp_out (ctx, CP_NEXT_TO_CURRENT); + + cp_name(ctx, cp_exit); + cp_set (ctx, USER_SAVE, NOT_PENDING); + cp_set (ctx, USER_LOAD, NOT_PENDING); + cp_out (ctx, CP_END); + ctx->ctxvals_pos += 0x400; /* padding... no idea why you need it */ +}