aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c')
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c423
1 files changed, 313 insertions, 110 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 881015080d83..e813a3f8ea93 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -850,12 +850,17 @@ gf100_grctx_init_gcc_0[] = {
};
const struct gf100_gr_pack
-gf100_grctx_pack_gpc[] = {
+gf100_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf100_grctx_init_prop_0 },
{ gf100_grctx_init_gpc_unk_1 },
{ gf100_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+const struct gf100_gr_pack
+gf100_grctx_pack_gpc_1[] = {
{ gf100_grctx_init_crstr_0 },
{ gf100_grctx_init_gpm_0 },
{ gf100_grctx_init_gcc_0 },
@@ -1025,6 +1030,13 @@ gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
}
void
+gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000);
+}
+
+void
gf100_grctx_generate_bundle(struct gf100_grctx *info)
{
const struct gf100_grctx_func *grctx = info->gr->func->grctx;
@@ -1080,89 +1092,38 @@ gf100_grctx_generate_unkn(struct gf100_gr *gr)
}
void
-gf100_grctx_generate_tpcid(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- int gpc, tpc, id;
-
- for (tpc = 0, id = 0; tpc < 4; tpc++) {
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- if (tpc < gr->tpc_nr[gpc]) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
- id++;
- }
-
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
- }
- }
-}
-
-void
-gf100_grctx_generate_r406028(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- u32 tmp[GPC_MAX / 8] = {}, i = 0;
- for (i = 0; i < gr->gpc_nr; i++)
- tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4);
- for (i = 0; i < 4; i++) {
- nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]);
- nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]);
- }
-}
-
-void
gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- u8 tpcnr[GPC_MAX], data[TPC_MAX];
- int gpc, tpc, i;
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- memset(data, 0x1f, sizeof(data));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
- data[tpc] = gpc;
+ const u8 gpcmax = nvkm_rd32(device, 0x022430);
+ const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
+ int i, j, sm = 0;
+ u32 data;
+
+ for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
+ for (data = 0, j = 0; j < 4; j++) {
+ if (sm < gr->sm_nr)
+ data |= gr->sm[sm++].gpc << (j * 8);
+ else
+ data |= 0x1f << (j * 8);
+ }
+ nvkm_wr32(device, 0x4060a8 + (i * 4), data);
}
-
- for (i = 0; i < 4; i++)
- nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
}
void
-gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
+gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
+ int i;
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
- /* and the second... */
+ /* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
@@ -1197,40 +1158,214 @@ gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
}
void
-gf100_grctx_generate_r406800(struct gf100_gr *gr)
+gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- u64 tpc_mask = 0, tpc_set = 0;
- u8 tpcnr[GPC_MAX];
- int gpc, tpc;
- int i, a, b;
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (gpc = 0; gpc < gr->gpc_nr; gpc++)
- tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8);
-
- for (i = 0, gpc = -1, b = -1; i < 32; i++) {
- a = (i * (gr->tpc_total - 1)) / 32;
- if (a != b) {
- b = a;
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- tpc_set |= 1ULL << ((gpc * 8) + tpc);
+ u32 fbps = nvkm_rd32(device, 0x121c74);
+ if (fbps == 1)
+ nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000);
+}
+
+static const u32
+gf100_grctx_alpha_beta_map[17][32] = {
+ [1] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+ [2] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+ //XXX: 3
+ [4] = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ },
+ //XXX: 5
+ //XXX: 6
+ [7] = {
+ 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6,
+ },
+ [8] = {
+ 1, 1, 1,
+ 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6,
+ 7, 7, 7,
+ },
+ //XXX: 9
+ //XXX: 10
+ [11] = {
+ 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3,
+ 4, 4, 4, 4,
+ 5, 5, 5,
+ 6, 6, 6,
+ 7, 7, 7, 7,
+ 8, 8, 8,
+ 9, 9, 9, 9,
+ 10, 10,
+ },
+ //XXX: 12
+ //XXX: 13
+ [14] = {
+ 1, 1,
+ 2, 2,
+ 3, 3, 3,
+ 4, 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8, 8,
+ 9, 9,
+ 10, 10, 10,
+ 11, 11, 11,
+ 12, 12,
+ 13, 13,
+ },
+ [15] = {
+ 1, 1,
+ 2, 2,
+ 3, 3,
+ 4, 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8,
+ 9, 9, 9,
+ 10, 10,
+ 11, 11, 11,
+ 12, 12,
+ 13, 13,
+ 14, 14,
+ },
+ [16] = {
+ 1, 1,
+ 2, 2,
+ 3, 3,
+ 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8,
+ 9, 9,
+ 10, 10, 10,
+ 11, 11,
+ 12, 12,
+ 13, 13,
+ 14, 14,
+ 15, 15,
+ },
+};
+
+void
+gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
+{
+ struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+ struct nvkm_device *device = subdev->device;
+ int i, gpc;
+
+ for (i = 0; i < 32; i++) {
+ u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i];
+ u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0;
+
+ if (!atarget) {
+ nvkm_warn(subdev, "missing alpha/beta mapping table\n");
+ atarget = max_t(u32, gr->tpc_total * i / 32, 1);
+ }
+
+ while (atarget) {
+ for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) {
+ if (abits[gpc] < gr->tpc_nr[gpc]) {
+ abits[gpc]++;
+ atarget--;
+ }
+ }
}
- nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
- nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
- if (gr->gpc_nr > 4) {
- nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
- nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ u32 bbits = gr->tpc_nr[gpc] - abits[gpc];
+ amask |= ((1 << abits[gpc]) - 1) << (gpc * 8);
+ bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8);
}
+
+ nvkm_wr32(device, 0x406800 + (i * 0x20), amask);
+ nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask);
}
}
void
+gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
+}
+
+void
+gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
+}
+
+void
+gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ const struct gf100_grctx_func *func = gr->func->grctx;
+ int gpc, sm, i, j;
+ u32 data;
+
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
+ if (func->tpc_nr)
+ func->tpc_nr(gr, gr->sm[sm].gpc);
+ }
+
+ for (gpc = 0, i = 0; i < 4; i++) {
+ for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++)
+ data |= gr->tpc_nr[gpc] << (j * 4);
+ nvkm_wr32(device, 0x406028 + (i * 4), data);
+ nvkm_wr32(device, 0x405870 + (i * 4), data);
+ }
+
+ if (func->r4060a8)
+ func->r4060a8(gr);
+
+ func->rop_mapping(gr);
+
+ if (func->alpha_beta_tables)
+ func->alpha_beta_tables(gr);
+ if (func->max_ways_evict)
+ func->max_ways_evict(gr);
+ if (func->dist_skip_table)
+ func->dist_skip_table(gr);
+ if (func->r406500)
+ func->r406500(gr);
+ if (func->gpc_tpc_nr)
+ func->gpc_tpc_nr(gr);
+ if (func->r419f78)
+ func->r419f78(gr);
+ if (func->tpc_mask)
+ func->tpc_mask(gr);
+ if (func->smid_config)
+ func->smid_config(gr);
+}
+
+void
gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -1239,29 +1374,63 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
nvkm_mc_unk260(device, 0);
- gf100_gr_mmio(gr, grctx->hub);
- gf100_gr_mmio(gr, grctx->gpc);
- gf100_gr_mmio(gr, grctx->zcull);
- gf100_gr_mmio(gr, grctx->tpc);
- gf100_gr_mmio(gr, grctx->ppc);
+ if (!gr->fuc_sw_ctx) {
+ gf100_gr_mmio(gr, grctx->hub);
+ gf100_gr_mmio(gr, grctx->gpc_0);
+ gf100_gr_mmio(gr, grctx->zcull);
+ gf100_gr_mmio(gr, grctx->gpc_1);
+ gf100_gr_mmio(gr, grctx->tpc);
+ gf100_gr_mmio(gr, grctx->ppc);
+ } else {
+ gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+ }
+
+ gf100_gr_wait_idle(gr);
idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
- grctx->bundle(info);
grctx->pagepool(info);
+ grctx->bundle(info);
grctx->attrib(info);
+ if (grctx->patch_ltc)
+ grctx->patch_ltc(info);
grctx->unkn(gr);
- gf100_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gf100_grctx_generate_r4060a8(gr);
- gf100_grctx_generate_r418bb8(gr);
- gf100_grctx_generate_r406800(gr);
+ gf100_grctx_generate_floorsweep(gr);
+
+ gf100_gr_wait_idle(gr);
+
+ if (grctx->r400088) grctx->r400088(gr, false);
+ if (gr->fuc_bundle)
+ gf100_gr_icmd(gr, gr->fuc_bundle);
+ else
+ gf100_gr_icmd(gr, grctx->icmd);
+ if (grctx->sw_veid_bundle_init)
+ gf100_gr_icmd(gr, grctx->sw_veid_bundle_init);
+ if (grctx->r400088) grctx->r400088(gr, true);
- gf100_gr_icmd(gr, grctx->icmd);
nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, grctx->mthd);
+
+ if (gr->fuc_method)
+ gf100_gr_mthd(gr, gr->fuc_method);
+ else
+ gf100_gr_mthd(gr, grctx->mthd);
nvkm_mc_unk260(device, 1);
+
+ if (grctx->r419cb8)
+ grctx->r419cb8(gr);
+ if (grctx->r418800)
+ grctx->r418800(gr);
+ if (grctx->r419eb0)
+ grctx->r419eb0(gr);
+ if (grctx->r419e00)
+ grctx->r419e00(gr);
+ if (grctx->r418e94)
+ grctx->r418e94(gr);
+ if (grctx->r419a3c)
+ grctx->r419a3c(gr);
+ if (grctx->r408840)
+ grctx->r408840(gr);
}
#define CB_RESERVED 0x80000
@@ -1280,6 +1449,32 @@ gf100_grctx_generate(struct gf100_gr *gr)
int ret, i;
u64 addr;
+ /* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */
+ nvkm_wr32(device, 0x404170, 0x00000012);
+ nvkm_msec(device, 2000,
+ if (!(nvkm_rd32(device, 0x404170) & 0x00000010))
+ break;
+ );
+
+ if (grctx->unkn88c)
+ grctx->unkn88c(gr, true);
+
+ /* Reset FECS. */
+ nvkm_wr32(device, 0x409614, 0x00000070);
+ nvkm_usec(device, 10, NVKM_DELAY);
+ nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
+ nvkm_usec(device, 10, NVKM_DELAY);
+ nvkm_rd32(device, 0x409614);
+
+ if (grctx->unkn88c)
+ grctx->unkn88c(gr, false);
+
+ /* NV_PGRAPH_FE_PWR_MODE_AUTO. */
+ nvkm_wr32(device, 0x404170, 0x00000010);
+
+ /* Init SCC RAM. */
+ nvkm_wr32(device, 0x40802c, 0x00000001);
+
/* Allocate memory to for a "channel", which we'll use to generate
* the default context values.
*/
@@ -1392,7 +1587,8 @@ gf100_grctx = {
.main = gf100_grctx_generate_main,
.unkn = gf100_grctx_generate_unkn,
.hub = gf100_grctx_pack_hub,
- .gpc = gf100_grctx_pack_gpc,
+ .gpc_0 = gf100_grctx_pack_gpc_0,
+ .gpc_1 = gf100_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf100_grctx_pack_tpc,
.icmd = gf100_grctx_pack_icmd,
@@ -1404,4 +1600,11 @@ gf100_grctx = {
.attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324,
.attrib_nr = 0x218,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};